From 5d410557dea452f6231a7c66155e29a37e168528 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Tue, 9 May 2023 16:48:17 +0800 Subject: [PATCH 01/40] vhost: fix possible wrap in SVQ descriptor ring MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QEMU invokes vhost_svq_add() when adding a guest's element into SVQ. In vhost_svq_add(), it uses vhost_svq_available_slots() to check whether QEMU can add the element into SVQ. If there is enough space, then QEMU combines some out descriptors and some in descriptors into one descriptor chain, and adds it into `svq->vring.desc` by vhost_svq_vring_write_descs(). Yet the problem is that, `svq->shadow_avail_idx - svq->shadow_used_idx` in vhost_svq_available_slots() returns the number of occupied elements, or the number of descriptor chains, instead of the number of occupied descriptors, which may cause wrapping in SVQ descriptor ring. Here is an example. In vhost_handle_guest_kick(), QEMU forwards as many available buffers to device by virtqueue_pop() and vhost_svq_add_element(). virtqueue_pop() returns a guest's element, and then this element is added into SVQ by vhost_svq_add_element(), a wrapper to vhost_svq_add(). If QEMU invokes virtqueue_pop() and vhost_svq_add_element() `svq->vring.num` times, vhost_svq_available_slots() thinks QEMU just ran out of slots and everything should work fine. But in fact, virtqueue_pop() returns `svq->vring.num` elements or descriptor chains, more than `svq->vring.num` descriptors due to guest memory fragmentation, and this causes wrapping in SVQ descriptor ring. This bug is valid even before marking the descriptors used. If the guest memory is fragmented, SVQ must add chains so it can try to add more descriptors than possible. This patch solves it by adding `num_free` field in VhostShadowVirtqueue structure and updating this field in vhost_svq_add() and vhost_svq_get_buf(), to record the number of free descriptors. Fixes: 100890f7ca ("vhost: Shadow virtqueue buffers forwarding") Signed-off-by: Hawkins Jiawei Acked-by: Eugenio Pérez Message-Id: <20230509084817.3973-1-yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Tested-by: Lei Yang --- hw/virtio/vhost-shadow-virtqueue.c | 5 ++++- hw/virtio/vhost-shadow-virtqueue.h | 3 +++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 8361e70d1b..bd7c12b6d3 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -68,7 +68,7 @@ bool vhost_svq_valid_features(uint64_t features, Error **errp) */ static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq) { - return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx); + return svq->num_free; } /** @@ -263,6 +263,7 @@ int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, return -EINVAL; } + svq->num_free -= ndescs; svq->desc_state[qemu_head].elem = elem; svq->desc_state[qemu_head].ndescs = ndescs; vhost_svq_kick(svq); @@ -449,6 +450,7 @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); svq->desc_next[last_used_chain] = svq->free_head; svq->free_head = used_elem.id; + svq->num_free += num; *len = used_elem.len; return g_steal_pointer(&svq->desc_state[used_elem.id].elem); @@ -659,6 +661,7 @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, svq->iova_tree = iova_tree; svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq)); + svq->num_free = svq->vring.num; driver_size = vhost_svq_driver_area_size(svq); device_size = vhost_svq_device_area_size(svq); svq->vring.desc = qemu_memalign(qemu_real_host_page_size(), driver_size); diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h index 926a4897b1..6efe051a70 100644 --- a/hw/virtio/vhost-shadow-virtqueue.h +++ b/hw/virtio/vhost-shadow-virtqueue.h @@ -107,6 +107,9 @@ typedef struct VhostShadowVirtqueue { /* Next head to consume from the device */ uint16_t last_used_idx; + + /* Size of SVQ vring free descriptors */ + uint16_t num_free; } VhostShadowVirtqueue; bool vhost_svq_valid_features(uint64_t features, Error **errp); From 71ba92f3488b64bd5c81e2872c56e88cea21bb95 Mon Sep 17 00:00:00 2001 From: Hao Zeng Date: Fri, 21 Apr 2023 14:20:19 +0100 Subject: [PATCH 02/40] hw/cxl: cdat: Fix open file not closed in ct3_load_cdat() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Open file descriptor not closed in error paths. Fix by replace open coded handling of read of whole file into a buffer with g_file_get_contents() Fixes: aba578bdac ("hw/cxl: CDAT Data Object Exchange implementation") Signed-off-by: Zeng Hao Suggested-by: Philippe Mathieu-Daudé Suggested-by: Peter Maydell Suggested-by: Jonathan Cameron via Signed-off-by: Jonathan Cameron -- Changes since v5: - Drop if guard on g_free() as per checkpatch warning. Message-Id: <20230421132020.7408-2-Jonathan.Cameron@huawei.com> Reviewed-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-cdat.c | 29 ++++++++--------------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/hw/cxl/cxl-cdat.c b/hw/cxl/cxl-cdat.c index 137abd0992..056711d63d 100644 --- a/hw/cxl/cxl-cdat.c +++ b/hw/cxl/cxl-cdat.c @@ -110,29 +110,18 @@ static void ct3_load_cdat(CDATObject *cdat, Error **errp) g_autofree CDATEntry *cdat_st = NULL; uint8_t sum = 0; int num_ent; - int i = 0, ent = 1, file_size = 0; + int i = 0, ent = 1; + gsize file_size = 0; CDATSubHeader *hdr; - FILE *fp = NULL; + GError *error = NULL; /* Read CDAT file and create its cache */ - fp = fopen(cdat->filename, "r"); - if (!fp) { - error_setg(errp, "CDAT: Unable to open file"); + if (!g_file_get_contents(cdat->filename, (gchar **)&cdat->buf, + &file_size, &error)) { + error_setg(errp, "CDAT: File read failed: %s", error->message); + g_error_free(error); return; } - - fseek(fp, 0, SEEK_END); - file_size = ftell(fp); - fseek(fp, 0, SEEK_SET); - cdat->buf = g_malloc0(file_size); - - if (fread(cdat->buf, file_size, 1, fp) == 0) { - error_setg(errp, "CDAT: File read failed"); - return; - } - - fclose(fp); - if (file_size < sizeof(CDATTableHeader)) { error_setg(errp, "CDAT: File too short"); return; @@ -218,7 +207,5 @@ void cxl_doe_cdat_release(CXLComponentState *cxl_cstate) cdat->free_cdat_table(cdat->built_buf, cdat->built_buf_len, cdat->private); } - if (cdat->buf) { - free(cdat->buf); - } + g_free(cdat->buf); } From 7b22a3218ad0b8388c8bf20d394e3220b2fc8798 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 21 Apr 2023 14:20:20 +0100 Subject: [PATCH 03/40] hw/cxl: cdat: Fix failure to free buffer in erorr paths The failure paths in CDAT file loading did not clear up properly. Change to using g_auto_free and a local pointer for the buffer to ensure this function has no side effects on error. Also drop some unnecessary checks that can not fail. Cleanup properly after a failure to load a CDAT file. Suggested-by: Peter Maydell Signed-off-by: Jonathan Cameron Message-Id: <20230421132020.7408-3-Jonathan.Cameron@huawei.com> Reviewed-by: Fan Ni Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-cdat.c | 33 ++++++++++++++++++--------------- hw/mem/cxl_type3.c | 4 ++++ hw/pci-bridge/cxl_upstream.c | 3 +++ 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/hw/cxl/cxl-cdat.c b/hw/cxl/cxl-cdat.c index 056711d63d..d246d6885b 100644 --- a/hw/cxl/cxl-cdat.c +++ b/hw/cxl/cxl-cdat.c @@ -108,6 +108,7 @@ static void ct3_build_cdat(CDATObject *cdat, Error **errp) static void ct3_load_cdat(CDATObject *cdat, Error **errp) { g_autofree CDATEntry *cdat_st = NULL; + g_autofree char *buf = NULL; uint8_t sum = 0; int num_ent; int i = 0, ent = 1; @@ -116,7 +117,7 @@ static void ct3_load_cdat(CDATObject *cdat, Error **errp) GError *error = NULL; /* Read CDAT file and create its cache */ - if (!g_file_get_contents(cdat->filename, (gchar **)&cdat->buf, + if (!g_file_get_contents(cdat->filename, (gchar **)&buf, &file_size, &error)) { error_setg(errp, "CDAT: File read failed: %s", error->message); g_error_free(error); @@ -129,9 +130,17 @@ static void ct3_load_cdat(CDATObject *cdat, Error **errp) i = sizeof(CDATTableHeader); num_ent = 1; while (i < file_size) { - hdr = (CDATSubHeader *)(cdat->buf + i); + hdr = (CDATSubHeader *)(buf + i); + if (i + sizeof(CDATSubHeader) > file_size) { + error_setg(errp, "CDAT: Truncated table"); + return; + } cdat_len_check(hdr, errp); i += hdr->length; + if (i > file_size) { + error_setg(errp, "CDAT: Truncated table"); + return; + } num_ent++; } if (i != file_size) { @@ -139,33 +148,26 @@ static void ct3_load_cdat(CDATObject *cdat, Error **errp) return; } - cdat_st = g_malloc0(sizeof(*cdat_st) * num_ent); - if (!cdat_st) { - error_setg(errp, "CDAT: Failed to allocate entry array"); - return; - } + cdat_st = g_new0(CDATEntry, num_ent); /* Set CDAT header, Entry = 0 */ - cdat_st[0].base = cdat->buf; + cdat_st[0].base = buf; cdat_st[0].length = sizeof(CDATTableHeader); i = 0; while (i < cdat_st[0].length) { - sum += cdat->buf[i++]; + sum += buf[i++]; } /* Read CDAT structures */ while (i < file_size) { - hdr = (CDATSubHeader *)(cdat->buf + i); - cdat_len_check(hdr, errp); - + hdr = (CDATSubHeader *)(buf + i); cdat_st[ent].base = hdr; cdat_st[ent].length = hdr->length; - while (cdat->buf + i < - (uint8_t *)cdat_st[ent].base + cdat_st[ent].length) { + while (buf + i < (char *)cdat_st[ent].base + cdat_st[ent].length) { assert(i < file_size); - sum += cdat->buf[i++]; + sum += buf[i++]; } ent++; @@ -176,6 +178,7 @@ static void ct3_load_cdat(CDATObject *cdat, Error **errp) } cdat->entry_len = num_ent; cdat->entry = g_steal_pointer(&cdat_st); + cdat->buf = g_steal_pointer(&buf); } void cxl_doe_cdat_init(CXLComponentState *cxl_cstate, Error **errp) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index abe60b362c..7647122cc6 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -593,6 +593,9 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) cxl_cstate->cdat.free_cdat_table = ct3_free_cdat_table; cxl_cstate->cdat.private = ct3d; cxl_doe_cdat_init(cxl_cstate, errp); + if (*errp) { + goto err_free_special_ops; + } pcie_cap_deverr_init(pci_dev); /* Leave a bit of room for expansion */ @@ -605,6 +608,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) err_release_cdat: cxl_doe_cdat_release(cxl_cstate); +err_free_special_ops: g_free(regs->special_ops); err_address_space_free: address_space_destroy(&ct3d->hostmem_as); diff --git a/hw/pci-bridge/cxl_upstream.c b/hw/pci-bridge/cxl_upstream.c index 9df436cb73..ef47e5d625 100644 --- a/hw/pci-bridge/cxl_upstream.c +++ b/hw/pci-bridge/cxl_upstream.c @@ -346,6 +346,9 @@ static void cxl_usp_realize(PCIDevice *d, Error **errp) cxl_cstate->cdat.free_cdat_table = free_default_cdat_table; cxl_cstate->cdat.private = d; cxl_doe_cdat_init(cxl_cstate, errp); + if (*errp) { + goto err_cap; + } return; From ca4750583a597e97cbf8cec008d228f95d22c426 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Fri, 21 Apr 2023 14:45:05 +0100 Subject: [PATCH 04/40] docs/cxl: fix some typos Signed-off-by: Brice Goglin Signed-off-by: Jonathan Cameron Message-Id: <20230421134507.26842-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/system/devices/cxl.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst index 4c38223069..9618becdab 100644 --- a/docs/system/devices/cxl.rst +++ b/docs/system/devices/cxl.rst @@ -162,7 +162,7 @@ Example system Topology. x marks the match in each decoder level:: |<------------------SYSTEM PHYSICAL ADDRESS MAP (1)----------------->| | __________ __________________________________ __________ | | | | | | | | | - | | CFMW 0 | | CXL Fixed Memory Window 1 | | CFMW 1 | | + | | CFMW 0 | | CXL Fixed Memory Window 1 | | CFMW 2 | | | | HB0 only | | Configured to interleave memory | | HB1 only | | | | | | memory accesses across HB0/HB1 | | | | | |__________| |_____x____________________________| |__________| | @@ -208,8 +208,8 @@ Notes: (1) **3 CXL Fixed Memory Windows (CFMW)** corresponding to different ranges of the system physical address map. Each CFMW has particular interleave setup across the CXL Host Bridges (HB) - CFMW0 provides uninterleaved access to HB0, CFW2 provides - uninterleaved access to HB1. CFW1 provides interleaved memory access + CFMW0 provides uninterleaved access to HB0, CFMW2 provides + uninterleaved access to HB1. CFMW1 provides interleaved memory access across HB0 and HB1. (2) **Two CXL Host Bridges**. Each of these has 2 CXL Root Ports and @@ -247,7 +247,7 @@ Example topology involving a switch:: |<------------------SYSTEM PHYSICAL ADDRESS MAP (1)----------------->| | __________ __________________________________ __________ | | | | | | | | | - | | CFMW 0 | | CXL Fixed Memory Window 1 | | CFMW 1 | | + | | CFMW 0 | | CXL Fixed Memory Window 1 | | CFMW 2 | | | | HB0 only | | Configured to interleave memory | | HB1 only | | | | | | memory accesses across HB0/HB1 | | | | | |____x_____| |__________________________________| |__________| | From 9830ea6126c6a3b3ea7af720d98aa7717dba59eb Mon Sep 17 00:00:00 2001 From: Raghu H Date: Fri, 21 Apr 2023 14:45:06 +0100 Subject: [PATCH 05/40] docs/cxl: Remove incorrect CXL type 3 size parameter cxl-type3 memory size is read directly from the provided memory backed end device. Remove non existent size option Signed-off-by: Raghu H Signed-off-by: Jonathan Cameron Message-Id: <20230421134507.26842-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/system/devices/cxl.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst index 9618becdab..dd1a62bd57 100644 --- a/docs/system/devices/cxl.rst +++ b/docs/system/devices/cxl.rst @@ -354,13 +354,13 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave:: -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \ -device cxl-upstream,bus=root_port0,id=us0 \ -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ - -device cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0,size=256M \ + -device cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ - -device cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1,size=256M \ + -device cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ - -device cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2,size=256M \ + -device cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ - -device cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3,size=256M \ + -device cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k Kernel Configuration Options From 0795b98f096b876a36e2c45adb42c2004655011e Mon Sep 17 00:00:00 2001 From: Raghu H Date: Fri, 21 Apr 2023 14:45:07 +0100 Subject: [PATCH 06/40] docs/cxl: Replace unsupported AARCH64 with x86_64 Currently Qemu CXL emulation support is not availabe on AARCH64 but its available with qemu x86_64 architecture, updating the document to reflect the supported platform. Signed-off-by: Raghu H Signed-off-by: Jonathan Cameron Message-Id: <20230421134507.26842-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/system/devices/cxl.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst index dd1a62bd57..8f2885aba1 100644 --- a/docs/system/devices/cxl.rst +++ b/docs/system/devices/cxl.rst @@ -302,7 +302,7 @@ Example command lines --------------------- A very simple setup with just one directly attached CXL Type 3 device:: - qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \ + qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \ ... -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest.raw,size=256M \ -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa.raw,size=256M \ @@ -315,7 +315,7 @@ A setup suitable for 4 way interleave. Only one fixed window provided, to enable interleave across 2 CXL host bridges. Each host bridge has 2 CXL Root Ports, with the CXL Type3 device directly attached (no switches).:: - qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \ + qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \ ... -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest.raw,size=256M \ -object memory-backend-file,id=cxl-mem2,share=on,mem-path=/tmp/cxltest2.raw,size=256M \ @@ -339,7 +339,7 @@ the CXL Type3 device directly attached (no switches).:: An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave:: - qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \ + qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \ ... -object memory-backend-file,id=cxl-mem0,share=on,mem-path=/tmp/cxltest.raw,size=256M \ -object memory-backend-file,id=cxl-mem1,share=on,mem-path=/tmp/cxltest1.raw,size=256M \ From 23e1248d7e3cb7331a1cee13ff109a5c52471ec2 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 21 Apr 2023 14:59:04 +0100 Subject: [PATCH 07/40] hw/cxl: drop pointless memory_region_transaction_guards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not clear what intent was here, but probably based on a misunderstanding of what these guards are for. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Jonathan Cameron Message-Id: <20230421135906.3515-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-component-utils.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index b665d4f565..324be79b11 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -47,14 +47,12 @@ static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset, break; } - memory_region_transaction_begin(); stl_le_p((uint8_t *)cache_mem + offset, value); if (should_commit) { ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0); ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0); ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); } - memory_region_transaction_commit(); } static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value, From 92ff7cabf97d9942ebaeafed6747dc18c8c1f697 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 21 Apr 2023 14:59:05 +0100 Subject: [PATCH 08/40] hw/cxl: Fix endian handling for decoder commit. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not a real problem yet as all supported architectures are little endian, but continue to tidy these up when touching code for other reasons. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Jonathan Cameron Message-Id: <20230421135906.3515-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-component-utils.c | 8 ++++---- hw/mem/cxl_type3.c | 9 ++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index 324be79b11..a3e6cf75cf 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -47,12 +47,12 @@ static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset, break; } - stl_le_p((uint8_t *)cache_mem + offset, value); if (should_commit) { - ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0); - ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0); - ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); + value = FIELD_DP32(value, CXL_HDM_DECODER0_CTRL, COMMIT, 0); + value = FIELD_DP32(value, CXL_HDM_DECODER0_CTRL, ERR, 0); + value = FIELD_DP32(value, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); } + stl_le_p((uint8_t *)cache_mem + offset, value); } static void cxl_cache_mem_write_reg(void *opaque, hwaddr offset, uint64_t value, diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 7647122cc6..a2a9b17dbb 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -314,14 +314,17 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int which) { ComponentRegisters *cregs = &ct3d->cxl_cstate.crb; uint32_t *cache_mem = cregs->cache_mem_registers; + uint32_t ctrl; assert(which == 0); + ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL); /* TODO: Sanity checks that the decoder is possible */ - ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMIT, 0); - ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, ERR, 0); + ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMIT, 0); + ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0); + ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); - ARRAY_FIELD_DP32(cache_mem, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); + stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL, ctrl); } static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err) From 823371a630599346fd04d541f19b52e72ee84f7e Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 21 Apr 2023 14:59:06 +0100 Subject: [PATCH 09/40] hw/cxl: Fix incorrect reset of commit and associated clearing of committed. The hardware clearing the commit bit is not spec compliant. Clearing of committed bit when commit is cleared is not specifically stated in the CXL spec, but is the expected (and simplest) permitted behaviour so use that for QEMU emulation. Reviewed-by: Fan Ni Tested-by: Fan Ni Reviewed-by: Dave Jiang Signed-off-by: Jonathan Cameron -- v2: Picked up tags. Message-Id: <20230421135906.3515-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-component-utils.c | 6 +++++- hw/mem/cxl_type3.c | 21 ++++++++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/hw/cxl/cxl-component-utils.c b/hw/cxl/cxl-component-utils.c index a3e6cf75cf..378f1082ce 100644 --- a/hw/cxl/cxl-component-utils.c +++ b/hw/cxl/cxl-component-utils.c @@ -38,19 +38,23 @@ static void dumb_hdm_handler(CXLComponentState *cxl_cstate, hwaddr offset, ComponentRegisters *cregs = &cxl_cstate->crb; uint32_t *cache_mem = cregs->cache_mem_registers; bool should_commit = false; + bool should_uncommit = false; switch (offset) { case A_CXL_HDM_DECODER0_CTRL: should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + should_uncommit = !should_commit; break; default: break; } if (should_commit) { - value = FIELD_DP32(value, CXL_HDM_DECODER0_CTRL, COMMIT, 0); value = FIELD_DP32(value, CXL_HDM_DECODER0_CTRL, ERR, 0); value = FIELD_DP32(value, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); + } else if (should_uncommit) { + value = FIELD_DP32(value, CXL_HDM_DECODER0_CTRL, ERR, 0); + value = FIELD_DP32(value, CXL_HDM_DECODER0_CTRL, COMMITTED, 0); } stl_le_p((uint8_t *)cache_mem + offset, value); } diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index a2a9b17dbb..1bd5963a3f 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -320,13 +320,28 @@ static void hdm_decoder_commit(CXLType3Dev *ct3d, int which) ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL); /* TODO: Sanity checks that the decoder is possible */ - ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMIT, 0); ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0); ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 1); stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL, ctrl); } +static void hdm_decoder_uncommit(CXLType3Dev *ct3d, int which) +{ + ComponentRegisters *cregs = &ct3d->cxl_cstate.crb; + uint32_t *cache_mem = cregs->cache_mem_registers; + uint32_t ctrl; + + assert(which == 0); + + ctrl = ldl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL); + + ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, ERR, 0); + ctrl = FIELD_DP32(ctrl, CXL_HDM_DECODER0_CTRL, COMMITTED, 0); + + stl_le_p(cache_mem + R_CXL_HDM_DECODER0_CTRL, ctrl); +} + static int ct3d_qmp_uncor_err_to_cxl(CxlUncorErrorType qmp_err) { switch (qmp_err) { @@ -395,6 +410,7 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value, CXLType3Dev *ct3d = container_of(cxl_cstate, CXLType3Dev, cxl_cstate); uint32_t *cache_mem = cregs->cache_mem_registers; bool should_commit = false; + bool should_uncommit = false; int which_hdm = -1; assert(size == 4); @@ -403,6 +419,7 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value, switch (offset) { case A_CXL_HDM_DECODER0_CTRL: should_commit = FIELD_EX32(value, CXL_HDM_DECODER0_CTRL, COMMIT); + should_uncommit = !should_commit; which_hdm = 0; break; case A_CXL_RAS_UNC_ERR_STATUS: @@ -489,6 +506,8 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value, stl_le_p((uint8_t *)cache_mem + offset, value); if (should_commit) { hdm_decoder_commit(ct3d, which_hdm); + } else if (should_uncommit) { + hdm_decoder_uncommit(ct3d, which_hdm); } } From 847ea4e746a1cac861ffe6b8256052131e8e3b93 Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Fri, 21 Apr 2023 17:08:25 +0100 Subject: [PATCH 10/40] tests/qtest/cxl-test: whitespace, line ending cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Defines are starting to exceed line length limits, align them for cleanliness before making modifications. Signed-off-by: Gregory Price Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Jonathan Cameron Message-Id: <20230421160827.2227-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/cxl-test.c | 78 +++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c index 61f25a72b6..eda2bbbbe6 100644 --- a/tests/qtest/cxl-test.c +++ b/tests/qtest/cxl-test.c @@ -8,50 +8,58 @@ #include "qemu/osdep.h" #include "libqtest-single.h" -#define QEMU_PXB_CMD "-machine q35,cxl=on " \ - "-device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 " \ - "-M cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.size=4G " +#define QEMU_PXB_CMD \ + "-machine q35,cxl=on " \ + "-device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 " \ + "-M cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.size=4G " -#define QEMU_2PXB_CMD "-machine q35,cxl=on " \ - "-device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 " \ - "-device pxb-cxl,id=cxl.1,bus=pcie.0,bus_nr=53 " \ - "-M cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=4G " +#define QEMU_2PXB_CMD \ + "-machine q35,cxl=on " \ + "-device pxb-cxl,id=cxl.0,bus=pcie.0,bus_nr=52 " \ + "-device pxb-cxl,id=cxl.1,bus=pcie.0,bus_nr=53 " \ + "-M cxl-fmw.0.targets.0=cxl.0,cxl-fmw.0.targets.1=cxl.1,cxl-fmw.0.size=4G " -#define QEMU_RP "-device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 " +#define QEMU_RP \ + "-device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 " /* Dual ports on first pxb */ -#define QEMU_2RP "-device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 " \ - "-device cxl-rp,id=rp1,bus=cxl.0,chassis=0,slot=1 " +#define QEMU_2RP \ + "-device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 " \ + "-device cxl-rp,id=rp1,bus=cxl.0,chassis=0,slot=1 " /* Dual ports on each of the pxb instances */ -#define QEMU_4RP "-device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 " \ - "-device cxl-rp,id=rp1,bus=cxl.0,chassis=0,slot=1 " \ - "-device cxl-rp,id=rp2,bus=cxl.1,chassis=0,slot=2 " \ - "-device cxl-rp,id=rp3,bus=cxl.1,chassis=0,slot=3 " +#define QEMU_4RP \ + "-device cxl-rp,id=rp0,bus=cxl.0,chassis=0,slot=0 " \ + "-device cxl-rp,id=rp1,bus=cxl.0,chassis=0,slot=1 " \ + "-device cxl-rp,id=rp2,bus=cxl.1,chassis=0,slot=2 " \ + "-device cxl-rp,id=rp3,bus=cxl.1,chassis=0,slot=3 " -#define QEMU_T3D "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ - "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " +#define QEMU_T3D \ + "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " -#define QEMU_2T3D "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ - "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " \ - "-object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M " \ - "-object memory-backend-file,id=lsa1,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1,id=cxl-pmem1 " +#define QEMU_2T3D \ + "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " \ + "-object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa1,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1,id=cxl-pmem1 " -#define QEMU_4T3D "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ - "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " \ - "-object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M " \ - "-object memory-backend-file,id=lsa1,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1,id=cxl-pmem1 " \ - "-object memory-backend-file,id=cxl-mem2,mem-path=%s,size=256M " \ - "-object memory-backend-file,id=lsa2,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp2,memdev=cxl-mem2,lsa=lsa2,id=cxl-pmem2 " \ - "-object memory-backend-file,id=cxl-mem3,mem-path=%s,size=256M " \ - "-object memory-backend-file,id=lsa3,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp3,memdev=cxl-mem3,lsa=lsa3,id=cxl-pmem3 " +#define QEMU_4T3D \ + "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " \ + "-object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa1,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1,id=cxl-pmem1 " \ + "-object memory-backend-file,id=cxl-mem2,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa2,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp2,memdev=cxl-mem2,lsa=lsa2,id=cxl-pmem2 " \ + "-object memory-backend-file,id=cxl-mem3,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa3,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp3,memdev=cxl-mem3,lsa=lsa3,id=cxl-pmem3 " static void cxl_basic_hb(void) { From 3521176526a901bd2a8418ce6470df0e38ca4e11 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 21 Apr 2023 17:08:26 +0100 Subject: [PATCH 11/40] hw/mem: Use memory_region_size() in cxl_type3 Accessors prefered over direct use of int128_get64() as they clamp out of range values. None are expected here but cleaner to always use the accessor than mix and match. Signed-off-by: Jonathan Cameron Message-Id: <20230421160827.2227-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Gregory Price --- hw/mem/cxl_type3.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 1bd5963a3f..2db756851c 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -52,7 +52,7 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, .DSMADhandle = dsmad_handle, .flags = CDAT_DSMAS_FLAG_NV, .DPA_base = 0, - .DPA_length = int128_get64(mr->size), + .DPA_length = memory_region_size(mr), }; /* For now, no memory side cache, plausiblish numbers */ @@ -133,7 +133,7 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, /* Reserved - the non volatile from DSMAS matters */ .EFI_memory_type_attr = 2, .DPA_offset = 0, - .DPA_length = int128_get64(mr->size), + .DPA_length = memory_region_size(mr), }; /* Header always at start of structure */ @@ -698,7 +698,7 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, return MEMTX_ERROR; } - if (dpa_offset > int128_get64(mr->size)) { + if (dpa_offset > memory_region_size(mr)) { return MEMTX_ERROR; } @@ -721,7 +721,7 @@ MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, return MEMTX_OK; } - if (dpa_offset > int128_get64(mr->size)) { + if (dpa_offset > memory_region_size(mr)) { return MEMTX_OK; } return address_space_write(&ct3d->hostmem_as, dpa_offset, attrs, From adacc814f541af9281c922e750d8ba4b90c1a73e Mon Sep 17 00:00:00 2001 From: Gregory Price Date: Fri, 21 Apr 2023 17:08:27 +0100 Subject: [PATCH 12/40] hw/cxl: Multi-Region CXL Type-3 Devices (Volatile and Persistent) This commit enables each CXL Type-3 device to contain one volatile memory region and one persistent region. Two new properties have been added to cxl-type3 device initialization: [volatile-memdev] and [persistent-memdev] The existing [memdev] property has been deprecated and will default the memory region to a persistent memory region (although a user may assign the region to a ram or file backed region). It cannot be used in combination with the new [persistent-memdev] property. Partitioning volatile memory from persistent memory is not yet supported. Volatile memory is mapped at DPA(0x0), while Persistent memory is mapped at DPA(vmem->size), per CXL Spec 8.2.9.8.2.0 - Get Partition Info. Signed-off-by: Gregory Price Reviewed-by: Davidlohr Bueso Reviewed-by: Fan Ni Tested-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20230421160827.2227-4-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/about/deprecated.rst | 8 + docs/system/devices/cxl.rst | 49 ++++-- hw/cxl/cxl-mailbox-utils.c | 32 ++-- hw/mem/cxl_type3.c | 306 ++++++++++++++++++++++++--------- include/hw/cxl/cxl_device.h | 11 +- tests/qtest/bios-tables-test.c | 8 +- tests/qtest/cxl-test.c | 76 ++++++-- 7 files changed, 371 insertions(+), 119 deletions(-) diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 7bb4d2f4f6..e934e0a13a 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -328,6 +328,14 @@ from Intel that was not properly allocated. Since version 5.2, the controller has used a properly allocated identifier. Deprecate the ``use-intel-id`` machine compatibility parameter. +``-device cxl-type3,memdev=xxxx`` (since 8.0) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``cxl-type3`` device initially only used a single memory backend. With +the addition of volatile memory support, it is now necessary to distinguish +between persistent and volatile memory backends. As such, memdev is deprecated +in favor of persistent-memdev. + Block device options '''''''''''''''''''' diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst index 8f2885aba1..f12011e230 100644 --- a/docs/system/devices/cxl.rst +++ b/docs/system/devices/cxl.rst @@ -300,7 +300,7 @@ Example topology involving a switch:: Example command lines --------------------- -A very simple setup with just one directly attached CXL Type 3 device:: +A very simple setup with just one directly attached CXL Type 3 Persistent Memory device:: qemu-system-x86_64 -M q35,cxl=on -m 4G,maxmem=8G,slots=8 -smp 4 \ ... @@ -308,7 +308,28 @@ A very simple setup with just one directly attached CXL Type 3 device:: -object memory-backend-file,id=cxl-lsa1,share=on,mem-path=/tmp/lsa.raw,size=256M \ -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \ - -device cxl-type3,bus=root_port13,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \ + -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \ + -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G + +A very simple setup with just one directly attached CXL Type 3 Volatile Memory device:: + + qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \ + ... + -object memory-backend-ram,id=vmem0,share=on,size=256M \ + -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ + -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \ + -device cxl-type3,bus=root_port13,volatile-memdev=vmem0,id=cxl-vmem0 \ + -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G + +The same volatile setup may optionally include an LSA region:: + + qemu-system-aarch64 -M virt,gic-version=3,cxl=on -m 4g,maxmem=8G,slots=8 -cpu max \ + ... + -object memory-backend-ram,id=vmem0,share=on,size=256M \ + -object memory-backend-file,id=cxl-lsa0,share=on,mem-path=/tmp/lsa.raw,size=256M \ + -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ + -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \ + -device cxl-type3,bus=root_port13,volatile-memdev=vmem0,lsa=cxl-lsa0,id=cxl-vmem0 \ -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G A setup suitable for 4 way interleave. Only one fixed window provided, to enable 2 way @@ -328,13 +349,13 @@ the CXL Type3 device directly attached (no switches).:: -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1 \ -device pxb-cxl,bus_nr=222,bus=pcie.0,id=cxl.2 \ -device cxl-rp,port=0,bus=cxl.1,id=root_port13,chassis=0,slot=2 \ - -device cxl-type3,bus=root_port13,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \ + -device cxl-type3,bus=root_port13,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem0 \ -device cxl-rp,port=1,bus=cxl.1,id=root_port14,chassis=0,slot=3 \ - -device cxl-type3,bus=root_port14,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1 \ + -device cxl-type3,bus=root_port14,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem1 \ -device cxl-rp,port=0,bus=cxl.2,id=root_port15,chassis=0,slot=5 \ - -device cxl-type3,bus=root_port15,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2 \ + -device cxl-type3,bus=root_port15,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem2 \ -device cxl-rp,port=1,bus=cxl.2,id=root_port16,chassis=0,slot=6 \ - -device cxl-type3,bus=root_port16,memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3 \ + -device cxl-type3,bus=root_port16,persistent-memdev=cxl-mem4,lsa=cxl-lsa4,id=cxl-pmem3 \ -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.targets.1=cxl.2,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=8k An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave:: @@ -354,15 +375,23 @@ An example of 4 devices below a switch suitable for 1, 2 or 4 way interleave:: -device cxl-rp,port=1,bus=cxl.1,id=root_port1,chassis=0,slot=1 \ -device cxl-upstream,bus=root_port0,id=us0 \ -device cxl-downstream,port=0,bus=us0,id=swport0,chassis=0,slot=4 \ - -device cxl-type3,bus=swport0,memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ + -device cxl-type3,bus=swport0,persistent-memdev=cxl-mem0,lsa=cxl-lsa0,id=cxl-pmem0 \ -device cxl-downstream,port=1,bus=us0,id=swport1,chassis=0,slot=5 \ - -device cxl-type3,bus=swport1,memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ + -device cxl-type3,bus=swport1,persistent-memdev=cxl-mem1,lsa=cxl-lsa1,id=cxl-pmem1 \ -device cxl-downstream,port=2,bus=us0,id=swport2,chassis=0,slot=6 \ - -device cxl-type3,bus=swport2,memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ + -device cxl-type3,bus=swport2,persistent-memdev=cxl-mem2,lsa=cxl-lsa2,id=cxl-pmem2 \ -device cxl-downstream,port=3,bus=us0,id=swport3,chassis=0,slot=7 \ - -device cxl-type3,bus=swport3,memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ + -device cxl-type3,bus=swport3,persistent-memdev=cxl-mem3,lsa=cxl-lsa3,id=cxl-pmem3 \ -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=4k +Deprecations +------------ + +The Type 3 device [memdev] attribute has been deprecated in favor of the +[persistent-memdev] attributes. [memdev] will default to a persistent memory +device for backward compatibility and is incapable of being used in combination +with [persistent-memdev]. + Kernel Configuration Options ---------------------------- diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 206e04a4b8..ed663cc04a 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -141,7 +141,8 @@ static ret_code cmd_firmware_update_get_info(struct cxl_cmd *cmd, } QEMU_PACKED *fw_info; QEMU_BUILD_BUG_ON(sizeof(*fw_info) != 0x50); - if (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER) { + if ((cxl_dstate->vmem_size < CXL_CAPACITY_MULTIPLIER) || + (cxl_dstate->pmem_size < CXL_CAPACITY_MULTIPLIER)) { return CXL_MBOX_INTERNAL_ERROR; } @@ -288,21 +289,21 @@ static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd, CXLType3Dev *ct3d = container_of(cxl_dstate, CXLType3Dev, cxl_dstate); CXLType3Class *cvc = CXL_TYPE3_GET_CLASS(ct3d); - uint64_t size = cxl_dstate->pmem_size; - if (!QEMU_IS_ALIGNED(size, CXL_CAPACITY_MULTIPLIER)) { + if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) || + (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) { return CXL_MBOX_INTERNAL_ERROR; } id = (void *)cmd->payload; memset(id, 0, sizeof(*id)); - /* PMEM only */ snprintf(id->fw_revision, 0x10, "BWFW VERSION %02d", 0); - id->total_capacity = size / CXL_CAPACITY_MULTIPLIER; - id->persistent_capacity = size / CXL_CAPACITY_MULTIPLIER; - id->lsa_size = cvc->get_lsa_size(ct3d); + stq_le_p(&id->total_capacity, cxl_dstate->mem_size / CXL_CAPACITY_MULTIPLIER); + stq_le_p(&id->persistent_capacity, cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER); + stq_le_p(&id->volatile_capacity, cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER); + stl_le_p(&id->lsa_size, cvc->get_lsa_size(ct3d)); *len = sizeof(*id); return CXL_MBOX_SUCCESS; @@ -319,17 +320,20 @@ static ret_code cmd_ccls_get_partition_info(struct cxl_cmd *cmd, uint64_t next_pmem; } QEMU_PACKED *part_info = (void *)cmd->payload; QEMU_BUILD_BUG_ON(sizeof(*part_info) != 0x20); - uint64_t size = cxl_dstate->pmem_size; - if (!QEMU_IS_ALIGNED(size, CXL_CAPACITY_MULTIPLIER)) { + if ((!QEMU_IS_ALIGNED(cxl_dstate->vmem_size, CXL_CAPACITY_MULTIPLIER)) || + (!QEMU_IS_ALIGNED(cxl_dstate->pmem_size, CXL_CAPACITY_MULTIPLIER))) { return CXL_MBOX_INTERNAL_ERROR; } - /* PMEM only */ - part_info->active_vmem = 0; - part_info->next_vmem = 0; - part_info->active_pmem = size / CXL_CAPACITY_MULTIPLIER; - part_info->next_pmem = 0; + stq_le_p(&part_info->active_vmem, cxl_dstate->vmem_size / CXL_CAPACITY_MULTIPLIER); + /* + * When both next_vmem and next_pmem are 0, there is no pending change to + * partitioning. + */ + stq_le_p(&part_info->next_vmem, 0); + stq_le_p(&part_info->active_pmem, cxl_dstate->pmem_size / CXL_CAPACITY_MULTIPLIER); + stq_le_p(&part_info->next_pmem, 0); *len = sizeof(*part_info); return CXL_MBOX_SUCCESS; diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index 2db756851c..2adacbd01b 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -31,7 +31,8 @@ enum { }; static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, - int dsmad_handle, MemoryRegion *mr) + int dsmad_handle, MemoryRegion *mr, + bool is_pmem, uint64_t dpa_base) { g_autofree CDATDsmas *dsmas = NULL; g_autofree CDATDslbis *dslbis0 = NULL; @@ -50,8 +51,8 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, .length = sizeof(*dsmas), }, .DSMADhandle = dsmad_handle, - .flags = CDAT_DSMAS_FLAG_NV, - .DPA_base = 0, + .flags = is_pmem ? CDAT_DSMAS_FLAG_NV : 0, + .DPA_base = dpa_base, .DPA_length = memory_region_size(mr), }; @@ -130,8 +131,11 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, .length = sizeof(*dsemts), }, .DSMAS_handle = dsmad_handle, - /* Reserved - the non volatile from DSMAS matters */ - .EFI_memory_type_attr = 2, + /* + * NV: Reserved - the non volatile from DSMAS matters + * V: EFI_MEMORY_SP + */ + .EFI_memory_type_attr = is_pmem ? 2 : 1, .DPA_offset = 0, .DPA_length = memory_region_size(mr), }; @@ -150,33 +154,68 @@ static int ct3_build_cdat_entries_for_mr(CDATSubHeader **cdat_table, static int ct3_build_cdat_table(CDATSubHeader ***cdat_table, void *priv) { g_autofree CDATSubHeader **table = NULL; - MemoryRegion *nonvolatile_mr; CXLType3Dev *ct3d = priv; + MemoryRegion *volatile_mr = NULL, *nonvolatile_mr = NULL; int dsmad_handle = 0; - int rc; + int cur_ent = 0; + int len = 0; + int rc, i; - if (!ct3d->hostmem) { + if (!ct3d->hostpmem && !ct3d->hostvmem) { return 0; } - nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostmem); - if (!nonvolatile_mr) { - return -EINVAL; + if (ct3d->hostvmem) { + volatile_mr = host_memory_backend_get_memory(ct3d->hostvmem); + if (!volatile_mr) { + return -EINVAL; + } + len += CT3_CDAT_NUM_ENTRIES; } - table = g_malloc0(CT3_CDAT_NUM_ENTRIES * sizeof(*table)); + if (ct3d->hostpmem) { + nonvolatile_mr = host_memory_backend_get_memory(ct3d->hostpmem); + if (!nonvolatile_mr) { + return -EINVAL; + } + len += CT3_CDAT_NUM_ENTRIES; + } + + table = g_malloc0(len * sizeof(*table)); if (!table) { return -ENOMEM; } - rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, nonvolatile_mr); - if (rc < 0) { - return rc; + /* Now fill them in */ + if (volatile_mr) { + rc = ct3_build_cdat_entries_for_mr(table, dsmad_handle++, volatile_mr, + false, 0); + if (rc < 0) { + return rc; + } + cur_ent = CT3_CDAT_NUM_ENTRIES; } + if (nonvolatile_mr) { + rc = ct3_build_cdat_entries_for_mr(&(table[cur_ent]), dsmad_handle++, + nonvolatile_mr, true, + (volatile_mr ? + memory_region_size(volatile_mr) : 0)); + if (rc < 0) { + goto error_cleanup; + } + cur_ent += CT3_CDAT_NUM_ENTRIES; + } + assert(len == cur_ent); + *cdat_table = g_steal_pointer(&table); - return CT3_CDAT_NUM_ENTRIES; + return len; +error_cleanup: + for (i = 0; i < cur_ent; i++) { + g_free(table[i]); + } + return rc; } static void ct3_free_cdat_table(CDATSubHeader **cdat_table, int num, void *priv) @@ -264,16 +303,42 @@ static void build_dvsecs(CXLType3Dev *ct3d) { CXLComponentState *cxl_cstate = &ct3d->cxl_cstate; uint8_t *dvsec; + uint32_t range1_size_hi, range1_size_lo, + range1_base_hi = 0, range1_base_lo = 0, + range2_size_hi = 0, range2_size_lo = 0, + range2_base_hi = 0, range2_base_lo = 0; + + /* + * Volatile memory is mapped as (0x0) + * Persistent memory is mapped at (volatile->size) + */ + if (ct3d->hostvmem) { + range1_size_hi = ct3d->hostvmem->size >> 32; + range1_size_lo = (2 << 5) | (2 << 2) | 0x3 | + (ct3d->hostvmem->size & 0xF0000000); + if (ct3d->hostpmem) { + range2_size_hi = ct3d->hostpmem->size >> 32; + range2_size_lo = (2 << 5) | (2 << 2) | 0x3 | + (ct3d->hostpmem->size & 0xF0000000); + } + } else { + range1_size_hi = ct3d->hostpmem->size >> 32; + range1_size_lo = (2 << 5) | (2 << 2) | 0x3 | + (ct3d->hostpmem->size & 0xF0000000); + } dvsec = (uint8_t *)&(CXLDVSECDevice){ .cap = 0x1e, .ctrl = 0x2, .status2 = 0x2, - .range1_size_hi = ct3d->hostmem->size >> 32, - .range1_size_lo = (2 << 5) | (2 << 2) | 0x3 | - (ct3d->hostmem->size & 0xF0000000), - .range1_base_hi = 0, - .range1_base_lo = 0, + .range1_size_hi = range1_size_hi, + .range1_size_lo = range1_size_lo, + .range1_base_hi = range1_base_hi, + .range1_base_lo = range1_base_lo, + .range2_size_hi = range2_size_hi, + .range2_size_lo = range2_size_lo, + .range2_base_hi = range2_base_hi, + .range2_base_lo = range2_base_lo, }; cxl_component_create_dvsec(cxl_cstate, CXL2_TYPE3_DEVICE, PCIE_CXL_DEVICE_DVSEC_LENGTH, @@ -514,36 +579,69 @@ static void ct3d_reg_write(void *opaque, hwaddr offset, uint64_t value, static bool cxl_setup_memory(CXLType3Dev *ct3d, Error **errp) { DeviceState *ds = DEVICE(ct3d); - MemoryRegion *mr; - char *name; - if (!ct3d->hostmem) { - error_setg(errp, "memdev property must be set"); + if (!ct3d->hostmem && !ct3d->hostvmem && !ct3d->hostpmem) { + error_setg(errp, "at least one memdev property must be set"); + return false; + } else if (ct3d->hostmem && ct3d->hostpmem) { + error_setg(errp, "[memdev] cannot be used with new " + "[persistent-memdev] property"); + return false; + } else if (ct3d->hostmem) { + /* Use of hostmem property implies pmem */ + ct3d->hostpmem = ct3d->hostmem; + ct3d->hostmem = NULL; + } + + if (ct3d->hostpmem && !ct3d->lsa) { + error_setg(errp, "lsa property must be set for persistent devices"); return false; } - mr = host_memory_backend_get_memory(ct3d->hostmem); - if (!mr) { - error_setg(errp, "memdev property must be set"); - return false; + if (ct3d->hostvmem) { + MemoryRegion *vmr; + char *v_name; + + vmr = host_memory_backend_get_memory(ct3d->hostvmem); + if (!vmr) { + error_setg(errp, "volatile memdev must have backing device"); + return false; + } + memory_region_set_nonvolatile(vmr, false); + memory_region_set_enabled(vmr, true); + host_memory_backend_set_mapped(ct3d->hostvmem, true); + if (ds->id) { + v_name = g_strdup_printf("cxl-type3-dpa-vmem-space:%s", ds->id); + } else { + v_name = g_strdup("cxl-type3-dpa-vmem-space"); + } + address_space_init(&ct3d->hostvmem_as, vmr, v_name); + ct3d->cxl_dstate.vmem_size = memory_region_size(vmr); + ct3d->cxl_dstate.mem_size += memory_region_size(vmr); + g_free(v_name); } - memory_region_set_nonvolatile(mr, true); - memory_region_set_enabled(mr, true); - host_memory_backend_set_mapped(ct3d->hostmem, true); - if (ds->id) { - name = g_strdup_printf("cxl-type3-dpa-space:%s", ds->id); - } else { - name = g_strdup("cxl-type3-dpa-space"); - } - address_space_init(&ct3d->hostmem_as, mr, name); - g_free(name); + if (ct3d->hostpmem) { + MemoryRegion *pmr; + char *p_name; - ct3d->cxl_dstate.pmem_size = ct3d->hostmem->size; - - if (!ct3d->lsa) { - error_setg(errp, "lsa property must be set"); - return false; + pmr = host_memory_backend_get_memory(ct3d->hostpmem); + if (!pmr) { + error_setg(errp, "persistent memdev must have backing device"); + return false; + } + memory_region_set_nonvolatile(pmr, true); + memory_region_set_enabled(pmr, true); + host_memory_backend_set_mapped(ct3d->hostpmem, true); + if (ds->id) { + p_name = g_strdup_printf("cxl-type3-dpa-pmem-space:%s", ds->id); + } else { + p_name = g_strdup("cxl-type3-dpa-pmem-space"); + } + address_space_init(&ct3d->hostpmem_as, pmr, p_name); + ct3d->cxl_dstate.pmem_size = memory_region_size(pmr); + ct3d->cxl_dstate.mem_size += memory_region_size(pmr); + g_free(p_name); } return true; @@ -633,7 +731,12 @@ err_release_cdat: err_free_special_ops: g_free(regs->special_ops); err_address_space_free: - address_space_destroy(&ct3d->hostmem_as); + if (ct3d->hostpmem) { + address_space_destroy(&ct3d->hostpmem_as); + } + if (ct3d->hostvmem) { + address_space_destroy(&ct3d->hostvmem_as); + } return; } @@ -646,7 +749,12 @@ static void ct3_exit(PCIDevice *pci_dev) pcie_aer_exit(pci_dev); cxl_doe_cdat_release(cxl_cstate); g_free(regs->special_ops); - address_space_destroy(&ct3d->hostmem_as); + if (ct3d->hostpmem) { + address_space_destroy(&ct3d->hostpmem_as); + } + if (ct3d->hostvmem) { + address_space_destroy(&ct3d->hostvmem_as); + } } /* TODO: Support multiple HDM decoders and DPA skip */ @@ -681,51 +789,77 @@ static bool cxl_type3_dpa(CXLType3Dev *ct3d, hwaddr host_addr, uint64_t *dpa) return true; } +static int cxl_type3_hpa_to_as_and_dpa(CXLType3Dev *ct3d, + hwaddr host_addr, + unsigned int size, + AddressSpace **as, + uint64_t *dpa_offset) +{ + MemoryRegion *vmr = NULL, *pmr = NULL; + + if (ct3d->hostvmem) { + vmr = host_memory_backend_get_memory(ct3d->hostvmem); + } + if (ct3d->hostpmem) { + pmr = host_memory_backend_get_memory(ct3d->hostpmem); + } + + if (!vmr && !pmr) { + return -ENODEV; + } + + if (!cxl_type3_dpa(ct3d, host_addr, dpa_offset)) { + return -EINVAL; + } + + if (*dpa_offset > ct3d->cxl_dstate.mem_size) { + return -EINVAL; + } + + if (vmr) { + if (*dpa_offset < memory_region_size(vmr)) { + *as = &ct3d->hostvmem_as; + } else { + *as = &ct3d->hostpmem_as; + *dpa_offset -= memory_region_size(vmr); + } + } else { + *as = &ct3d->hostpmem_as; + } + + return 0; +} + MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, unsigned size, MemTxAttrs attrs) { - CXLType3Dev *ct3d = CXL_TYPE3(d); - uint64_t dpa_offset; - MemoryRegion *mr; + uint64_t dpa_offset = 0; + AddressSpace *as = NULL; + int res; - /* TODO support volatile region */ - mr = host_memory_backend_get_memory(ct3d->hostmem); - if (!mr) { + res = cxl_type3_hpa_to_as_and_dpa(CXL_TYPE3(d), host_addr, size, + &as, &dpa_offset); + if (res) { return MEMTX_ERROR; } - if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) { - return MEMTX_ERROR; - } - - if (dpa_offset > memory_region_size(mr)) { - return MEMTX_ERROR; - } - - return address_space_read(&ct3d->hostmem_as, dpa_offset, attrs, data, size); + return address_space_read(as, dpa_offset, attrs, data, size); } MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, unsigned size, MemTxAttrs attrs) { - CXLType3Dev *ct3d = CXL_TYPE3(d); - uint64_t dpa_offset; - MemoryRegion *mr; + uint64_t dpa_offset = 0; + AddressSpace *as = NULL; + int res; - mr = host_memory_backend_get_memory(ct3d->hostmem); - if (!mr) { - return MEMTX_OK; + res = cxl_type3_hpa_to_as_and_dpa(CXL_TYPE3(d), host_addr, size, + &as, &dpa_offset); + if (res) { + return MEMTX_ERROR; } - if (!cxl_type3_dpa(ct3d, host_addr, &dpa_offset)) { - return MEMTX_OK; - } - - if (dpa_offset > memory_region_size(mr)) { - return MEMTX_OK; - } - return address_space_write(&ct3d->hostmem_as, dpa_offset, attrs, - &data, size); + return address_space_write(as, dpa_offset, attrs, &data, size); } static void ct3d_reset(DeviceState *dev) @@ -740,7 +874,11 @@ static void ct3d_reset(DeviceState *dev) static Property ct3_props[] = { DEFINE_PROP_LINK("memdev", CXLType3Dev, hostmem, TYPE_MEMORY_BACKEND, - HostMemoryBackend *), + HostMemoryBackend *), /* for backward compatibility */ + DEFINE_PROP_LINK("persistent-memdev", CXLType3Dev, hostpmem, + TYPE_MEMORY_BACKEND, HostMemoryBackend *), + DEFINE_PROP_LINK("volatile-memdev", CXLType3Dev, hostvmem, + TYPE_MEMORY_BACKEND, HostMemoryBackend *), DEFINE_PROP_LINK("lsa", CXLType3Dev, lsa, TYPE_MEMORY_BACKEND, HostMemoryBackend *), DEFINE_PROP_UINT64("sn", CXLType3Dev, sn, UI64_NULL), @@ -752,6 +890,10 @@ static uint64_t get_lsa_size(CXLType3Dev *ct3d) { MemoryRegion *mr; + if (!ct3d->lsa) { + return 0; + } + mr = host_memory_backend_get_memory(ct3d->lsa); return memory_region_size(mr); } @@ -769,6 +911,10 @@ static uint64_t get_lsa(CXLType3Dev *ct3d, void *buf, uint64_t size, MemoryRegion *mr; void *lsa; + if (!ct3d->lsa) { + return 0; + } + mr = host_memory_backend_get_memory(ct3d->lsa); validate_lsa_access(mr, size, offset); @@ -784,6 +930,10 @@ static void set_lsa(CXLType3Dev *ct3d, const void *buf, uint64_t size, MemoryRegion *mr; void *lsa; + if (!ct3d->lsa) { + return; + } + mr = host_memory_backend_get_memory(ct3d->lsa); validate_lsa_access(mr, size, offset); @@ -955,7 +1105,7 @@ static void ct3_class_init(ObjectClass *oc, void *data) pc->config_read = ct3d_config_read; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); - dc->desc = "CXL PMEM Device (Type 3)"; + dc->desc = "CXL Memory Device (Type 3)"; dc->reset = ct3d_reset; device_class_set_props(dc, ct3_props); diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index d589f78202..edb9791bab 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -119,8 +119,10 @@ typedef struct cxl_device_state { uint64_t host_set; } timestamp; - /* memory region for persistent memory, HDM */ + /* memory region size, HDM */ + uint64_t mem_size; uint64_t pmem_size; + uint64_t vmem_size; } CXLDeviceState; /* Initialize the register block for a device */ @@ -245,12 +247,15 @@ struct CXLType3Dev { PCIDevice parent_obj; /* Properties */ - HostMemoryBackend *hostmem; + HostMemoryBackend *hostmem; /* deprecated */ + HostMemoryBackend *hostvmem; + HostMemoryBackend *hostpmem; HostMemoryBackend *lsa; uint64_t sn; /* State */ - AddressSpace hostmem_as; + AddressSpace hostvmem_as; + AddressSpace hostpmem_as; CXLComponentState cxl_cstate; CXLDeviceState cxl_dstate; diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index 7fd88b0e9c..159e4edb8f 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -1867,13 +1867,13 @@ static void test_acpi_q35_cxl(void) " -device pxb-cxl,bus_nr=12,bus=pcie.0,id=cxl.1" " -device pxb-cxl,bus_nr=222,bus=pcie.0,id=cxl.2" " -device cxl-rp,port=0,bus=cxl.1,id=rp1,chassis=0,slot=2" - " -device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1" + " -device cxl-type3,bus=rp1,persistent-memdev=cxl-mem1,lsa=lsa1" " -device cxl-rp,port=1,bus=cxl.1,id=rp2,chassis=0,slot=3" - " -device cxl-type3,bus=rp2,memdev=cxl-mem2,lsa=lsa2" + " -device cxl-type3,bus=rp2,persistent-memdev=cxl-mem2,lsa=lsa2" " -device cxl-rp,port=0,bus=cxl.2,id=rp3,chassis=0,slot=5" - " -device cxl-type3,bus=rp3,memdev=cxl-mem3,lsa=lsa3" + " -device cxl-type3,bus=rp3,persistent-memdev=cxl-mem3,lsa=lsa3" " -device cxl-rp,port=1,bus=cxl.2,id=rp4,chassis=0,slot=6" - " -device cxl-type3,bus=rp4,memdev=cxl-mem4,lsa=lsa4" + " -device cxl-type3,bus=rp4,persistent-memdev=cxl-mem4,lsa=lsa4" " -M cxl-fmw.0.targets.0=cxl.1,cxl-fmw.0.size=4G,cxl-fmw.0.interleave-granularity=8k," "cxl-fmw.1.targets.0=cxl.1,cxl-fmw.1.targets.1=cxl.2,cxl-fmw.1.size=4G,cxl-fmw.1.interleave-granularity=8k", tmp_path, tmp_path, tmp_path, tmp_path, diff --git a/tests/qtest/cxl-test.c b/tests/qtest/cxl-test.c index eda2bbbbe6..edcad4a0ce 100644 --- a/tests/qtest/cxl-test.c +++ b/tests/qtest/cxl-test.c @@ -34,32 +34,46 @@ "-device cxl-rp,id=rp2,bus=cxl.1,chassis=0,slot=2 " \ "-device cxl-rp,id=rp3,bus=cxl.1,chassis=0,slot=3 " -#define QEMU_T3D \ +#define QEMU_T3D_DEPRECATED \ "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " +#define QEMU_T3D_PMEM \ + "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ + "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp0,persistent-memdev=cxl-mem0,lsa=lsa0,id=pmem0 " + +#define QEMU_T3D_VMEM \ + "-object memory-backend-ram,id=cxl-mem0,size=256M " \ + "-device cxl-type3,bus=rp0,volatile-memdev=cxl-mem0,id=mem0 " + +#define QEMU_T3D_VMEM_LSA \ + "-object memory-backend-ram,id=cxl-mem0,size=256M " \ + "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ + "-device cxl-type3,bus=rp0,volatile-memdev=cxl-mem0,lsa=lsa0,id=mem0 " + #define QEMU_2T3D \ "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " \ + "-device cxl-type3,bus=rp0,persistent-memdev=cxl-mem0,lsa=lsa0,id=pmem0 " \ "-object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M " \ "-object memory-backend-file,id=lsa1,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1,id=cxl-pmem1 " + "-device cxl-type3,bus=rp1,persistent-memdev=cxl-mem1,lsa=lsa1,id=pmem1 " #define QEMU_4T3D \ "-object memory-backend-file,id=cxl-mem0,mem-path=%s,size=256M " \ "-object memory-backend-file,id=lsa0,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp0,memdev=cxl-mem0,lsa=lsa0,id=cxl-pmem0 " \ + "-device cxl-type3,bus=rp0,persistent-memdev=cxl-mem0,lsa=lsa0,id=pmem0 " \ "-object memory-backend-file,id=cxl-mem1,mem-path=%s,size=256M " \ "-object memory-backend-file,id=lsa1,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp1,memdev=cxl-mem1,lsa=lsa1,id=cxl-pmem1 " \ + "-device cxl-type3,bus=rp1,persistent-memdev=cxl-mem1,lsa=lsa1,id=pmem1 " \ "-object memory-backend-file,id=cxl-mem2,mem-path=%s,size=256M " \ "-object memory-backend-file,id=lsa2,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp2,memdev=cxl-mem2,lsa=lsa2,id=cxl-pmem2 " \ + "-device cxl-type3,bus=rp2,persistent-memdev=cxl-mem2,lsa=lsa2,id=pmem2 " \ "-object memory-backend-file,id=cxl-mem3,mem-path=%s,size=256M " \ "-object memory-backend-file,id=lsa3,mem-path=%s,size=256M " \ - "-device cxl-type3,bus=rp3,memdev=cxl-mem3,lsa=lsa3,id=cxl-pmem3 " + "-device cxl-type3,bus=rp3,persistent-memdev=cxl-mem3,lsa=lsa3,id=pmem3 " static void cxl_basic_hb(void) { @@ -98,14 +112,53 @@ static void cxl_2root_port(void) } #ifdef CONFIG_POSIX -static void cxl_t3d(void) +static void cxl_t3d_deprecated(void) { g_autoptr(GString) cmdline = g_string_new(NULL); g_autofree const char *tmpfs = NULL; tmpfs = g_dir_make_tmp("cxl-test-XXXXXX", NULL); - g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D, tmpfs, tmpfs); + g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D_DEPRECATED, + tmpfs, tmpfs); + + qtest_start(cmdline->str); + qtest_end(); +} + +static void cxl_t3d_persistent(void) +{ + g_autoptr(GString) cmdline = g_string_new(NULL); + g_autofree const char *tmpfs = NULL; + + tmpfs = g_dir_make_tmp("cxl-test-XXXXXX", NULL); + + g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D_PMEM, + tmpfs, tmpfs); + + qtest_start(cmdline->str); + qtest_end(); +} + +static void cxl_t3d_volatile(void) +{ + g_autoptr(GString) cmdline = g_string_new(NULL); + + g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D_VMEM); + + qtest_start(cmdline->str); + qtest_end(); +} + +static void cxl_t3d_volatile_lsa(void) +{ + g_autoptr(GString) cmdline = g_string_new(NULL); + g_autofree const char *tmpfs = NULL; + + tmpfs = g_dir_make_tmp("cxl-test-XXXXXX", NULL); + + g_string_printf(cmdline, QEMU_PXB_CMD QEMU_RP QEMU_T3D_VMEM_LSA, + tmpfs); qtest_start(cmdline->str); qtest_end(); @@ -155,7 +208,10 @@ int main(int argc, char **argv) qtest_add_func("/pci/cxl/rp", cxl_root_port); qtest_add_func("/pci/cxl/rp_x2", cxl_2root_port); #ifdef CONFIG_POSIX - qtest_add_func("/pci/cxl/type3_device", cxl_t3d); + qtest_add_func("/pci/cxl/type3_device", cxl_t3d_deprecated); + qtest_add_func("/pci/cxl/type3_device_pmem", cxl_t3d_persistent); + qtest_add_func("/pci/cxl/type3_device_vmem", cxl_t3d_volatile); + qtest_add_func("/pci/cxl/type3_device_vmem_lsa", cxl_t3d_volatile_lsa); qtest_add_func("/pci/cxl/rp_x2_type3_x2", cxl_1pxb_2rp_2t3d); qtest_add_func("/pci/cxl/pxb_x2_root_port_x4_type3_x4", cxl_2pxb_4rp_4t3d); #endif From 354b09d228e1d04272e126a1edfcc70701af2958 Mon Sep 17 00:00:00 2001 From: Eric DeVolder Date: Wed, 17 May 2023 12:25:43 -0400 Subject: [PATCH 13/40] ACPI: bios-tables-test.c step 2 (allowed-diff entries) Following the guidelines in tests/qtest/bios-tables-test.c, set up bios-tables-test-allowed-diff.h to ignore the imminent changes to the APIC tables, per step 2. Signed-off-by: Eric DeVolder Message-Id: <20230517162545.2191-2-eric.devolder@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Igor Mammedov Acked-by: Ani Sinha --- tests/qtest/bios-tables-test-allowed-diff.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..66ae44e6b9 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,15 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/microvm/APIC", +"tests/data/acpi/microvm/APIC.ioapic2", +"tests/data/acpi/microvm/APIC.pcie", +"tests/data/acpi/pc/APIC", +"tests/data/acpi/pc/APIC.acpihmat", +"tests/data/acpi/pc/APIC.cphp", +"tests/data/acpi/pc/APIC.dimmpxm", +"tests/data/acpi/q35/APIC", +"tests/data/acpi/q35/APIC.acpihmat", +"tests/data/acpi/q35/APIC.acpihmat-noinitiator", +"tests/data/acpi/q35/APIC.core-count2", +"tests/data/acpi/q35/APIC.cphp", +"tests/data/acpi/q35/APIC.dimmpxm", +"tests/data/acpi/q35/APIC.xapic", From 6da94e277cd6eaf627dcd2d50ca795c7a272b8aa Mon Sep 17 00:00:00 2001 From: Eric DeVolder Date: Wed, 17 May 2023 12:25:44 -0400 Subject: [PATCH 14/40] ACPI: i386: bump to MADT to revision 3 Currently i386 QEMU generates MADT revision 3, and reports MADT revision 1. Set .revision to 3 to match reality. Link: https://lore.kernel.org/linux-acpi/20230327191026.3454-1-eric.devolder@ora cle.com/T/#t Signed-off-by: Eric DeVolder Reviewed-by: Ani Sinha Message-Id: <20230517162545.2191-3-eric.devolder@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Igor Mammedov --- hw/i386/acpi-common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/acpi-common.c b/hw/i386/acpi-common.c index 52e5c1439a..8a0932fe84 100644 --- a/hw/i386/acpi-common.c +++ b/hw/i386/acpi-common.c @@ -102,7 +102,7 @@ void acpi_build_madt(GArray *table_data, BIOSLinker *linker, MachineClass *mc = MACHINE_GET_CLASS(x86ms); const CPUArchIdList *apic_ids = mc->possible_cpu_arch_ids(MACHINE(x86ms)); AcpiDeviceIfClass *adevc = ACPI_DEVICE_IF_GET_CLASS(adev); - AcpiTable table = { .sig = "APIC", .rev = 1, .oem_id = oem_id, + AcpiTable table = { .sig = "APIC", .rev = 3, .oem_id = oem_id, .oem_table_id = oem_table_id }; acpi_table_begin(&table, table_data); From 1141159cb471b89a63c256902f315bcf8dcf7d5e Mon Sep 17 00:00:00 2001 From: Eric DeVolder Date: Wed, 17 May 2023 12:25:45 -0400 Subject: [PATCH 15/40] ACPI: bios-tables-test.c step 5 (update expected table binaries) Following the guidelines in tests/qtest/bios-tables-test.c, this is step 5 and 6. An examination of all the files impacted (as listed in bios-tables-test-allowe-diff.h) shows only the MADT/APIC tables bumping revision from 1 to 3, and a corresponding change to the checksum. The below diff is typical: --- /tmp/asl-1F9641.dsl 2023-05-16 15:18:31.292579156 -0400 +++ /tmp/asl-GVD741.dsl 2023-05-16 15:18:31.291579149 -0400 @@ -1,32 +1,32 @@ /* * Intel ACPI Component Architecture * AML/ASL+ Disassembler version 20230331 (64-bit version) * Copyright (c) 2000 - 2023 Intel Corporation * - * Disassembly of tests/data/acpi/pc/APIC, Tue May 16 15:18:31 2023 + * Disassembly of /tmp/aml-R4D741, Tue May 16 15:18:31 2023 * * ACPI Data Table [APIC] * * Format: [HexOffset DecimalOffset ByteLength] FieldName : FieldValue (in hex) */ [000h 0000 004h] Signature : "APIC" [Multiple APIC Description Table (MADT)] [004h 0004 004h] Table Length : 00000078 -[008h 0008 001h] Revision : 01 -[009h 0009 001h] Checksum : 8A +[008h 0008 001h] Revision : 03 +[009h 0009 001h] Checksum : 88 [00Ah 0010 006h] Oem ID : "BOCHS " [010h 0016 008h] Oem Table ID : "BXPC " [018h 0024 004h] Oem Revision : 00000001 [01Ch 0028 004h] Asl Compiler ID : "BXPC" [020h 0032 004h] Asl Compiler Revision : 00000001 [024h 0036 004h] Local Apic Address : FEE00000 [028h 0040 004h] Flags (decoded below) : 00000001 PC-AT Compatibility : 1 [02Ch 0044 001h] Subtable Type : 00 [Processor Local APIC] [02Dh 0045 001h] Length : 08 [02Eh 0046 001h] Processor ID : 00 [02Fh 0047 001h] Local Apic ID : 00 [030h 0048 004h] Flags (decoded below) : 00000001 Processor Enabled : 1 @@ -81,24 +81,24 @@ [06Bh 0107 001h] Source : 0B [06Ch 0108 004h] Interrupt : 0000000B [070h 0112 002h] Flags (decoded below) : 000D Polarity : 1 Trigger Mode : 3 [072h 0114 001h] Subtable Type : 04 [Local APIC NMI] [073h 0115 001h] Length : 06 [074h 0116 001h] Processor ID : FF [075h 0117 002h] Flags (decoded below) : 0000 Polarity : 0 Trigger Mode : 0 [077h 0119 001h] Interrupt Input LINT : 01 Raw Table Data: Length 120 (0x78) - 0000: 41 50 49 43 78 00 00 00 01 8A 42 4F 43 48 53 20 // APICx.....BOCHS + 0000: 41 50 49 43 78 00 00 00 03 88 42 4F 43 48 53 20 // APICx.....BOCHS 0010: 42 58 50 43 20 20 20 20 01 00 00 00 42 58 50 43 // BXPC ....BXPC 0020: 01 00 00 00 00 00 E0 FE 01 00 00 00 00 08 00 00 // ................ 0030: 01 00 00 00 01 0C 00 00 00 00 C0 FE 00 00 00 00 // ................ 0040: 02 0A 00 00 02 00 00 00 00 00 02 0A 00 05 05 00 // ................ 0050: 00 00 0D 00 02 0A 00 09 09 00 00 00 0D 00 02 0A // ................ 0060: 00 0A 0A 00 00 00 0D 00 02 0A 00 0B 0B 00 00 00 // ................ 0070: 0D 00 04 06 FF 00 00 01 // ........ Signed-off-by: Eric DeVolder Message-Id: <20230517162545.2191-4-eric.devolder@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Igor Mammedov Acked-by: Ani Sinha --- tests/data/acpi/microvm/APIC | Bin 70 -> 70 bytes tests/data/acpi/microvm/APIC.ioapic2 | Bin 82 -> 82 bytes tests/data/acpi/microvm/APIC.pcie | Bin 110 -> 110 bytes tests/data/acpi/pc/APIC | Bin 120 -> 120 bytes tests/data/acpi/pc/APIC.acpihmat | Bin 128 -> 128 bytes tests/data/acpi/pc/APIC.cphp | Bin 160 -> 160 bytes tests/data/acpi/pc/APIC.dimmpxm | Bin 144 -> 144 bytes tests/data/acpi/q35/APIC | Bin 120 -> 120 bytes tests/data/acpi/q35/APIC.acpihmat | Bin 128 -> 128 bytes tests/data/acpi/q35/APIC.acpihmat-noinitiator | Bin 144 -> 144 bytes tests/data/acpi/q35/APIC.core-count2 | Bin 2478 -> 2478 bytes tests/data/acpi/q35/APIC.cphp | Bin 160 -> 160 bytes tests/data/acpi/q35/APIC.dimmpxm | Bin 144 -> 144 bytes tests/data/acpi/q35/APIC.xapic | Bin 2686 -> 2686 bytes tests/qtest/bios-tables-test-allowed-diff.h | 14 -------------- 15 files changed, 14 deletions(-) diff --git a/tests/data/acpi/microvm/APIC b/tests/data/acpi/microvm/APIC index 68dbd44a7e35a356083f086df60f70e424c4249f..672764e711d80402890902ba9ded10915770e84c 100644 GIT binary patch delta 16 XcmZ>B<8ln}barE4U|=qq$Ylcn95e$) delta 16 XcmZ>B<8ln}barE4U|=kn$Ylcn95Mq& diff --git a/tests/data/acpi/microvm/APIC.ioapic2 b/tests/data/acpi/microvm/APIC.ioapic2 index 3063c52cd3e9bbed29c06031b375900f4a49b9e0..6f24fdb12ce3f1c13df7ff835e475d8023e20d4a 100644 GIT binary patch delta 16 XcmWFv;&Ke|bPi%*U|?>X$mIb59$W*3 delta 16 XcmWFv;&Ke|bPi%*U|?*X$mIb59$Ev1 diff --git a/tests/data/acpi/microvm/APIC.pcie b/tests/data/acpi/microvm/APIC.pcie index 4e8f6ed8d6a866429fc17aecdeafc3fb5ef65fa3..2239ca76a607fb1ff9d392298e2bd6461bba7ecf 100644 GIT binary patch delta 16 Xcmd1H<8ln}bk1X7U|_DA$dv*BBD@3c delta 16 Xcmd1H<8ln}bk1X7U|_77$dv*BBDw?a diff --git a/tests/data/acpi/pc/APIC b/tests/data/acpi/pc/APIC index 208331db53b7dd5c6205cce0e95427636b86dd64..868a3432f0295257393e45b75483ef4bec455d74 100644 GIT binary patch delta 16 Xcmb=Z;BpM`bgp1vU|{Z;$dv~GB#s0m delta 16 Xcmb=Z;BpM`bgp1vU|{T;$dv~GB#Z delta 18 ZcmbQhIDwJNF~HM#0s{jBqxVFvHUKF)1V;b> diff --git a/tests/data/acpi/q35/APIC b/tests/data/acpi/q35/APIC index 208331db53b7dd5c6205cce0e95427636b86dd64..868a3432f0295257393e45b75483ef4bec455d74 100644 GIT binary patch delta 16 Xcmb=Z;BpM`bgp1vU|{Z;$dv~GB#s0m delta 16 Xcmb=Z;BpM`bgp1vU|{T;$dv~GB#Z delta 18 ZcmbQhIDwJNF~HM#0s{jBqxVFvHUKF)1V;b> diff --git a/tests/data/acpi/q35/APIC.core-count2 b/tests/data/acpi/q35/APIC.core-count2 index a255082ef5bc39f0d92d3e372b91f09dd6d0d9a1..f5da2eb1e8a93d961b39f665f2e8b02acf1aeb3c 100644 GIT binary patch delta 19 acmZ1{yiS{I`URQ* delta 19 acmZ1{yiS{I_yw8( diff --git a/tests/data/acpi/q35/APIC.cphp b/tests/data/acpi/q35/APIC.cphp index 65cc4f4a9aa2676140a6525cdac1e838274b1e07..a2c2a24e5e3cf143b57a8932f78eeda6d7b8bbdb 100644 GIT binary patch delta 18 ZcmZ3$xPXz%F~HM#0RsaAv)DwgX#guQ1XKV3 delta 18 ZcmZ3$xPXz%F~HM#0RsaAqr^n6X#guO1XKV3 diff --git a/tests/data/acpi/q35/APIC.dimmpxm b/tests/data/acpi/q35/APIC.dimmpxm index d904d4a70ddecbb79a83a267af8e26f925e9f4c6..9b5922bc72db1fe64819a3970d6ca95543da799e 100644 GIT binary patch delta 18 ZcmbQhIDwJNF~HM#0s{jBv*$#vHUKF+1V;b> delta 18 ZcmbQhIDwJNF~HM#0s{jBqxVFvHUKF)1V;b> diff --git a/tests/data/acpi/q35/APIC.xapic b/tests/data/acpi/q35/APIC.xapic index c1969c35aa12b61d25e0134bbb8d2187ba42d663..83bd28325af9d6d7619015a9701866b8f3f1d754 100644 GIT binary patch delta 19 acmew-@=t`zF~HNgj*EeTS#2X%2^Ro9-UT)Q delta 19 acmew-@=t`zF~HNgj*EeTQDY-l2^Ro9+yyoO diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index 66ae44e6b9..dfb8523c8b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1,15 +1 @@ /* List of comma-separated changed AML files to ignore */ -"tests/data/acpi/microvm/APIC", -"tests/data/acpi/microvm/APIC.ioapic2", -"tests/data/acpi/microvm/APIC.pcie", -"tests/data/acpi/pc/APIC", -"tests/data/acpi/pc/APIC.acpihmat", -"tests/data/acpi/pc/APIC.cphp", -"tests/data/acpi/pc/APIC.dimmpxm", -"tests/data/acpi/q35/APIC", -"tests/data/acpi/q35/APIC.acpihmat", -"tests/data/acpi/q35/APIC.acpihmat-noinitiator", -"tests/data/acpi/q35/APIC.core-count2", -"tests/data/acpi/q35/APIC.cphp", -"tests/data/acpi/q35/APIC.dimmpxm", -"tests/data/acpi/q35/APIC.xapic", From 4ab049c7e68919b92a5ece5ad5baa52d0a963676 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 15 May 2023 15:52:27 +0300 Subject: [PATCH 16/40] pci: pci_add_option_rom(): improve style Fix over-80 lines and missing curly brackets for if-operators, which are required by QEMU coding style. Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: David Hildenbrand Message-Id: <20230515125229.44836-2-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Juan Quintela --- hw/pci/pci.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 8a87ccc8b0..e26e2a7e65 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2312,10 +2312,9 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, char name[32]; const VMStateDescription *vmsd; - if (!pdev->romfile) - return; - if (strlen(pdev->romfile) == 0) + if (!pdev->romfile || !strlen(pdev->romfile)) { return; + } if (!pdev->rom_bar) { /* @@ -2364,7 +2363,8 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, } if (pdev->romsize != -1) { if (size > pdev->romsize) { - error_setg(errp, "romfile \"%s\" (%u bytes) is too large for ROM size %u", + error_setg(errp, "romfile \"%s\" (%u bytes) " + "is too large for ROM size %u", pdev->romfile, (uint32_t)size, pdev->romsize); g_free(path); return; @@ -2374,14 +2374,13 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, } vmsd = qdev_get_vmsd(DEVICE(pdev)); + snprintf(name, sizeof(name), "%s.rom", + vmsd ? vmsd->name : object_get_typename(OBJECT(pdev))); - if (vmsd) { - snprintf(name, sizeof(name), "%s.rom", vmsd->name); - } else { - snprintf(name, sizeof(name), "%s.rom", object_get_typename(OBJECT(pdev))); - } pdev->has_rom = true; - memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, &error_fatal); + memory_region_init_rom(&pdev->rom, OBJECT(pdev), name, pdev->romsize, + &error_fatal); + ptr = memory_region_get_ram_ptr(&pdev->rom); if (load_image_size(path, ptr, size) < 0) { error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile); From 5b52692f9d258d0e2637234832dc00042de03a4d Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Mon, 15 May 2023 15:52:28 +0300 Subject: [PATCH 17/40] pci: pci_add_option_rom(): refactor: use g_autofree for path variable Signed-off-by: Vladimir Sementsov-Ogievskiy Reviewed-by: David Hildenbrand Message-Id: <20230515125229.44836-3-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Juan Quintela --- hw/pci/pci.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index e26e2a7e65..3a0107758c 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2307,7 +2307,7 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, Error **errp) { int64_t size; - char *path; + g_autofree char *path = NULL; void *ptr; char name[32]; const VMStateDescription *vmsd; @@ -2349,16 +2349,13 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, size = get_image_size(path); if (size < 0) { error_setg(errp, "failed to find romfile \"%s\"", pdev->romfile); - g_free(path); return; } else if (size == 0) { error_setg(errp, "romfile \"%s\" is empty", pdev->romfile); - g_free(path); return; } else if (size > 2 * GiB) { error_setg(errp, "romfile \"%s\" too large (size cannot exceed 2 GiB)", pdev->romfile); - g_free(path); return; } if (pdev->romsize != -1) { @@ -2366,7 +2363,6 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, error_setg(errp, "romfile \"%s\" (%u bytes) " "is too large for ROM size %u", pdev->romfile, (uint32_t)size, pdev->romsize); - g_free(path); return; } } else { @@ -2384,10 +2380,8 @@ static void pci_add_option_rom(PCIDevice *pdev, bool is_default_rom, ptr = memory_region_get_ram_ptr(&pdev->rom); if (load_image_size(path, ptr, size) < 0) { error_setg(errp, "failed to load romfile \"%s\"", pdev->romfile); - g_free(path); return; } - g_free(path); if (is_default_rom) { /* Only the default rom images will be patched (if needed). */ From 6f8be29ec17d44496b9ed67599bceaaba72d1864 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 1 May 2023 19:04:09 -0400 Subject: [PATCH 18/40] vhost-user: send SET_STATUS 0 after GET_VRING_BASE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Setting the VIRTIO Device Status Field to 0 resets the device. The device's state is lost, including the vring configuration. vhost-user.c currently sends SET_STATUS 0 before GET_VRING_BASE. This risks confusion about the lifetime of the vhost-user state (e.g. vring last_avail_idx) across VIRTIO device reset. Eugenio Pérez adjusted the order for vhost-vdpa.c in commit c3716f260bff ("vdpa: move vhost reset after get vring base") and in that commit description suggested doing the same for vhost-user in the future. Go ahead and adjust vhost-user.c now. I ran various online code searches to identify vhost-user backends implementing SET_STATUS. It seems only DPDK implements SET_STATUS and Yajun Wu has confirmed that it is safe to make this change. Fixes: commit 923b8921d210763359e96246a58658ac0db6c645 ("vhost-user: Support vhost_dev_start") Cc: Michael S. Tsirkin Cc: Cindy Lu Cc: Yajun Wu Signed-off-by: Stefan Hajnoczi Message-Id: <20230501230409.274178-1-stefanha@redhat.com> Reviewed-by: Maxime Coquelin Reviewed-by: Yajun Wu  Acked-by: Eugenio Pérez Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index e5285df4ba..40974afd06 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -2677,7 +2677,20 @@ static int vhost_user_dev_start(struct vhost_dev *dev, bool started) VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK); } else { - return vhost_user_set_status(dev, 0); + return 0; + } +} + +static void vhost_user_reset_status(struct vhost_dev *dev) +{ + /* Set device status only for last queue pair */ + if (dev->vq_index + dev->nvqs != dev->vq_index_end) { + return; + } + + if (virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_STATUS)) { + vhost_user_set_status(dev, 0); } } @@ -2716,4 +2729,5 @@ const VhostOps user_ops = { .vhost_get_inflight_fd = vhost_user_get_inflight_fd, .vhost_set_inflight_fd = vhost_user_set_inflight_fd, .vhost_dev_start = vhost_user_dev_start, + .vhost_reset_status = vhost_user_reset_status, }; From 5ed3dabe57dd9f4c007404345e5f5bf0e347317f Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Tue, 2 May 2023 21:27:02 -0300 Subject: [PATCH 19/40] hw/pci: Disable PCI_ERR_UNCOR_MASK register for machine type < 8.0 Since it's implementation on v8.0.0-rc0, having the PCI_ERR_UNCOR_MASK set for machine types < 8.0 will cause migration to fail if the target QEMU version is < 8.0.0 : qemu-system-x86_64: get_pci_config_device: Bad config data: i=0x10a read: 40 device: 0 cmask: ff wmask: 0 w1cmask:0 qemu-system-x86_64: Failed to load PCIDevice:config qemu-system-x86_64: Failed to load e1000e:parent_obj qemu-system-x86_64: error while loading state for instance 0x0 of device '0000:00:02.0/e1000e' qemu-system-x86_64: load of migration failed: Invalid argument The above test migrated a 7.2 machine type from QEMU master to QEMU 7.2.0, with this cmdline: ./qemu-system-x86_64 -M pc-q35-7.2 [-incoming XXX] In order to fix this, property x-pcie-err-unc-mask was introduced to control when PCI_ERR_UNCOR_MASK is enabled. This property is enabled by default, but is disabled if machine type <= 7.2. Fixes: 010746ae1d ("hw/pci/aer: Implement PCI_ERR_UNCOR_MASK register") Suggested-by: Michael S. Tsirkin Signed-off-by: Leonardo Bras Message-Id: <20230503002701.854329-1-leobras@redhat.com> Reviewed-by: Jonathan Cameron Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Fixes: https://gitlab.com/qemu-project/qemu/-/issues/1576 Tested-by: Fiona Ebner Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/core/machine.c | 1 + hw/pci/pci.c | 2 ++ hw/pci/pcie_aer.c | 11 +++++++---- include/hw/pci/pci.h | 2 ++ 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 47a34841a5..07f763eb2e 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -48,6 +48,7 @@ GlobalProperty hw_compat_7_2[] = { { "e1000e", "migrate-timadj", "off" }, { "virtio-mem", "x-early-migration", "false" }, { "migration", "x-preempt-pre-7-2", "true" }, + { TYPE_PCI_DEVICE, "x-pcie-err-unc-mask", "off" }, }; const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 3a0107758c..1cc7c89036 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -79,6 +79,8 @@ static Property pci_props[] = { DEFINE_PROP_STRING("failover_pair_id", PCIDevice, failover_pair_id), DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), + DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, + QEMU_PCIE_ERR_UNC_MASK_BITNR, true), DEFINE_PROP_END_OF_LIST() }; diff --git a/hw/pci/pcie_aer.c b/hw/pci/pcie_aer.c index 103667c368..374d593ead 100644 --- a/hw/pci/pcie_aer.c +++ b/hw/pci/pcie_aer.c @@ -112,10 +112,13 @@ int pcie_aer_init(PCIDevice *dev, uint8_t cap_ver, uint16_t offset, pci_set_long(dev->w1cmask + offset + PCI_ERR_UNCOR_STATUS, PCI_ERR_UNC_SUPPORTED); - pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, - PCI_ERR_UNC_MASK_DEFAULT); - pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, - PCI_ERR_UNC_SUPPORTED); + + if (dev->cap_present & QEMU_PCIE_ERR_UNC_MASK) { + pci_set_long(dev->config + offset + PCI_ERR_UNCOR_MASK, + PCI_ERR_UNC_MASK_DEFAULT); + pci_set_long(dev->wmask + offset + PCI_ERR_UNCOR_MASK, + PCI_ERR_UNC_SUPPORTED); + } pci_set_long(dev->config + offset + PCI_ERR_UNCOR_SEVER, PCI_ERR_UNC_SEVERITY_DEFAULT); diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 935b4b91b4..e6d0574a29 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -207,6 +207,8 @@ enum { QEMU_PCIE_EXTCAP_INIT = (1 << QEMU_PCIE_EXTCAP_INIT_BITNR), #define QEMU_PCIE_CXL_BITNR 10 QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR), +#define QEMU_PCIE_ERR_UNC_MASK_BITNR 11 + QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR), }; typedef struct PCIINTxRoute { From d5cef02574c126327a6f673c12b8718ce55f80e7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 3 May 2023 20:23:52 +0200 Subject: [PATCH 20/40] virtio-mem: Default to "unplugged-inaccessible=on" with 8.1 on x86-64 Allowing guests to read unplugged memory simplified the bring-up of virtio-mem in Linux guests -- which was limited to x86-64 only. On arm64 (which was added later), we never had legacy guests and don't even allow to configure it, essentially always having "unplugged-inaccessible=on". At this point, all guests we care about should be supporting VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, so let's change the default for the 8.1 machine. This change implies that also memory that supports the shared zeropage (private anonymous memory) will now require VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE in the driver in order to be usable by the guest -- as default, one can still manually set the unplugged-inaccessible property. Disallowing the guest to read unplugged memory will be important for some future features, such as memslot optimizations or protection of unplugged memory, whereby we'll actually no longer allow the guest to even read from unplugged memory. At some point, we might want to deprecate and remove that property. Cc: "Michael S. Tsirkin" Cc: Marcel Apfelbaum Cc: Paolo Bonzini Cc: Richard Henderson Cc: Eduardo Habkost Signed-off-by: David Hildenbrand Message-Id: <20230503182352.792458-1-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 4 +++- hw/virtio/virtio-mem.c | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index d761c8c775..7802dc21d9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -116,7 +116,9 @@ { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, -GlobalProperty pc_compat_8_0[] = {}; +GlobalProperty pc_compat_8_0[] = { + { "virtio-mem", "unplugged-inaccessible", "auto" }, +}; const size_t pc_compat_8_0_len = G_N_ELEMENTS(pc_compat_8_0); GlobalProperty pc_compat_7_2[] = { diff --git a/hw/virtio/virtio-mem.c b/hw/virtio/virtio-mem.c index 957fe77dc0..538b695c29 100644 --- a/hw/virtio/virtio-mem.c +++ b/hw/virtio/virtio-mem.c @@ -1341,7 +1341,7 @@ static Property virtio_mem_properties[] = { TYPE_MEMORY_BACKEND, HostMemoryBackend *), #if defined(VIRTIO_MEM_HAS_LEGACY_GUESTS) DEFINE_PROP_ON_OFF_AUTO(VIRTIO_MEM_UNPLUGGED_INACCESSIBLE_PROP, VirtIOMEM, - unplugged_inaccessible, ON_OFF_AUTO_AUTO), + unplugged_inaccessible, ON_OFF_AUTO_ON), #endif DEFINE_PROP_BOOL(VIRTIO_MEM_EARLY_MIGRATION_PROP, VirtIOMEM, early_migration, true), From bab105300bdeb7b651dd1b1f666ecb8e44be6d71 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 3 May 2023 20:41:44 +0200 Subject: [PATCH 21/40] vhost-user: Remove acpi-specific memslot limit Let's just support 512 memslots on x86-64 and aarch64 as well. The maximum number of ACPI slots (256) is no longer completely expressive ever since we supported virtio-based memory devices. Further, we're completely ignoring other memslots used outside of memory device context, such as memslots used for boot memory. Note that the vhost memslot limit in the kernel is usually configured to be 509. With this change, we prepare vhost-user on the QEMU side to be closer to that limit, to eventually support ~512 memslots in most vhost implementations and have less "surprises" when cold/hotplugging vhost devices while also consuming more memslots than we're currently used to by memory devices (e.g., once virtio-mem starts using multiple memslots). Note that most vhost-user implementations only support a small number of memslots so far, which we can hopefully improve in the near future. We'll leave the PPC special-case as is for now. Cc: "Michael S. Tsirkin" Cc: Igor Mammedov Cc: Stefan Hajnoczi Signed-off-by: David Hildenbrand Message-Id: <20230503184144.808478-1-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 40974afd06..e3ec8727da 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -42,17 +42,7 @@ #define VHOST_USER_F_PROTOCOL_FEATURES 30 #define VHOST_USER_BACKEND_MAX_FDS 8 -/* - * Set maximum number of RAM slots supported to - * the maximum number supported by the target - * hardware plaform. - */ -#if defined(TARGET_X86) || defined(TARGET_X86_64) || \ - defined(TARGET_ARM) || defined(TARGET_AARCH64) -#include "hw/acpi/acpi.h" -#define VHOST_USER_MAX_RAM_SLOTS ACPI_MAX_RAM_SLOTS - -#elif defined(TARGET_PPC) || defined(TARGET_PPC64) +#if defined(TARGET_PPC) || defined(TARGET_PPC64) #include "hw/ppc/spapr.h" #define VHOST_USER_MAX_RAM_SLOTS SPAPR_MAX_RAM_SLOTS From 1fac00f70b3261050af5564b20ca55c1b2a3059a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 4 May 2023 12:14:47 +0200 Subject: [PATCH 22/40] virtio-net: not enable vq reset feature unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The commit 93a97dc5200a ("virtio-net: enable vq reset feature") enables unconditionally vq reset feature as long as the device is emulated. This makes impossible to actually disable the feature, and it causes migration problems from qemu version previous than 7.2. The entire final commit is unneeded as device system already enable or disable the feature properly. This reverts commit 93a97dc5200a95e63b99cb625f20b7ae802ba413. Fixes: 93a97dc5200a ("virtio-net: enable vq reset feature") Signed-off-by: Eugenio Pérez Message-Id: <20230504101447.389398-1-eperezma@redhat.com> Reviewed-by: Xuan Zhuo Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 447f669921..8ce239a34d 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -805,7 +805,6 @@ static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features, } if (!get_vhost_net(nc->peer)) { - virtio_add_feature(&features, VIRTIO_F_RING_RESET); return features; } From 3e69908907f8d3dd20d5753b0777a6e3824ba824 Mon Sep 17 00:00:00 2001 From: Mauro Matteo Cascella Date: Tue, 9 May 2023 09:53:17 +0200 Subject: [PATCH 23/40] virtio-crypto: fix NULL pointer dereference in virtio_crypto_free_request Ensure op_info is not NULL in case of QCRYPTODEV_BACKEND_ALG_SYM algtype. Fixes: 0e660a6f90a ("crypto: Introduce RSA algorithm") Signed-off-by: Mauro Matteo Cascella Reported-by: Yiming Tao Message-Id: <20230509075317.1132301-1-mcascell@redhat.com> Reviewed-by: Gonglei Reviewed-by: zhenwei pi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-crypto.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index 2fe804510f..c729a1f79e 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -476,15 +476,17 @@ static void virtio_crypto_free_request(VirtIOCryptoReq *req) size_t max_len; CryptoDevBackendSymOpInfo *op_info = req->op_info.u.sym_op_info; - max_len = op_info->iv_len + - op_info->aad_len + - op_info->src_len + - op_info->dst_len + - op_info->digest_result_len; + if (op_info) { + max_len = op_info->iv_len + + op_info->aad_len + + op_info->src_len + + op_info->dst_len + + op_info->digest_result_len; - /* Zeroize and free request data structure */ - memset(op_info, 0, sizeof(*op_info) + max_len); - g_free(op_info); + /* Zeroize and free request data structure */ + memset(op_info, 0, sizeof(*op_info) + max_len); + g_free(op_info); + } } else if (req->flags == QCRYPTODEV_BACKEND_ALG_ASYM) { CryptoDevBackendAsymOpInfo *op_info = req->op_info.u.asym_op_info; if (op_info) { From 74b5d2b56c85515f1559ee0dfa8289bbb9832d51 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 10 May 2023 13:46:28 +0800 Subject: [PATCH 24/40] vhost: expose function vhost_dev_has_iommu() To support vIOMMU in vdpa, need to exposed the function vhost_dev_has_iommu, vdpa will use this function to check if vIOMMU enable. Signed-off-by: Cindy Lu Message-Id: <20230510054631.2951812-2-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 2 +- include/hw/virtio/vhost.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 746d130c74..23da579ce2 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -107,7 +107,7 @@ static void vhost_dev_sync_region(struct vhost_dev *dev, } } -static bool vhost_dev_has_iommu(struct vhost_dev *dev) +bool vhost_dev_has_iommu(struct vhost_dev *dev) { VirtIODevice *vdev = dev->vdev; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index a52f273347..f7f10c8fb7 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -336,4 +336,5 @@ int vhost_dev_set_inflight(struct vhost_dev *dev, struct vhost_inflight *inflight); int vhost_dev_get_inflight(struct vhost_dev *dev, uint16_t queue_size, struct vhost_inflight *inflight); +bool vhost_dev_has_iommu(struct vhost_dev *dev); #endif From 3d1e4d34a81a212e234f674e57e73c824d4b131a Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 10 May 2023 13:46:29 +0800 Subject: [PATCH 25/40] vhost_vdpa: fix the input in trace_vhost_vdpa_listener_region_del() In trace_vhost_vdpa_listener_region_del, the value for llend should change to int128_get64(int128_sub(llend, int128_one())) Signed-off-by: Cindy Lu Message-Id: <20230510054631.2951812-3-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index bc6bad23d5..92c2413c76 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -288,7 +288,8 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, iova = TARGET_PAGE_ALIGN(section->offset_within_address_space); llend = vhost_vdpa_section_end(section); - trace_vhost_vdpa_listener_region_del(v, iova, int128_get64(llend)); + trace_vhost_vdpa_listener_region_del(v, iova, + int128_get64(int128_sub(llend, int128_one()))); if (int128_ge(int128_make64(iova), llend)) { return; From 2fbef6aad892a3e784041fc5a6d5f5bda0565464 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 10 May 2023 13:46:30 +0800 Subject: [PATCH 26/40] vhost-vdpa: Add check for full 64-bit in region delete The unmap ioctl doesn't accept a full 64-bit span. So need to add check for the section's size in vhost_vdpa_listener_region_del(). Signed-off-by: Cindy Lu Message-Id: <20230510054631.2951812-4-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 92c2413c76..0c8c37e786 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -316,10 +316,28 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, vhost_iova_tree_remove(v->iova_tree, *result); } vhost_vdpa_iotlb_batch_begin_once(v); + /* + * The unmap ioctl doesn't accept a full 64-bit. need to check it + */ + if (int128_eq(llsize, int128_2_64())) { + llsize = int128_rshift(llsize, 1); + ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, + int128_get64(llsize)); + + if (ret) { + error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " + "0x%" HWADDR_PRIx ") = %d (%m)", + v, iova, int128_get64(llsize), ret); + } + iova += int128_get64(llsize); + } ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, int128_get64(llsize)); + if (ret) { - error_report("vhost_vdpa dma unmap error!"); + error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " + "0x%" HWADDR_PRIx ") = %d (%m)", + v, iova, int128_get64(llsize), ret); } memory_region_unref(section->mr); From bc7b0cac7bf41f24ceb84a03b491f93c378529a4 Mon Sep 17 00:00:00 2001 From: Cindy Lu Date: Wed, 10 May 2023 13:46:31 +0800 Subject: [PATCH 27/40] vhost-vdpa: Add support for vIOMMU. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. The vIOMMU support will make vDPA can work in IOMMU mode. This will fix security issues while using the no-IOMMU mode. To support this feature we need to add new functions for IOMMU MR adds and deletes. Also since the SVQ does not support vIOMMU yet, add the check for IOMMU in vhost_vdpa_dev_start, if the SVQ and IOMMU enable at the same time the function will return fail. 2. Skip the iova_max check vhost_vdpa_listener_skipped_section(). While MR is IOMMU, move this check to vhost_vdpa_iommu_map_notify() Verified in vp_vdpa and vdpa_sim_net driver Signed-off-by: Cindy Lu Message-Id: <20230510054631.2951812-5-lulu@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 145 +++++++++++++++++++++++++++++++-- include/hw/virtio/vhost-vdpa.h | 11 +++ 2 files changed, 149 insertions(+), 7 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 0c8c37e786..b3094e8a8b 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -26,6 +26,7 @@ #include "cpu.h" #include "trace.h" #include "qapi/error.h" +#include "hw/virtio/virtio-access.h" /* * Return one past the end of the end of section. Be careful with uint64_t @@ -60,13 +61,21 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, iova_min, section->offset_within_address_space); return true; } + /* + * While using vIOMMU, sometimes the section will be larger than iova_max, + * but the memory that actually maps is smaller, so move the check to + * function vhost_vdpa_iommu_map_notify(). That function will use the actual + * size that maps to the kernel + */ - llend = vhost_vdpa_section_end(section); - if (int128_gt(llend, int128_make64(iova_max))) { - error_report("RAM section out of device range (max=0x%" PRIx64 - ", end addr=0x%" PRIx64 ")", - iova_max, int128_get64(llend)); - return true; + if (!memory_region_is_iommu(section->mr)) { + llend = vhost_vdpa_section_end(section); + if (int128_gt(llend, int128_make64(iova_max))) { + error_report("RAM section out of device range (max=0x%" PRIx64 + ", end addr=0x%" PRIx64 ")", + iova_max, int128_get64(llend)); + return true; + } } return false; @@ -185,6 +194,115 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) v->iotlb_batch_begin_sent = false; } +static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) +{ + struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n); + + hwaddr iova = iotlb->iova + iommu->iommu_offset; + struct vhost_vdpa *v = iommu->dev; + void *vaddr; + int ret; + Int128 llend; + + if (iotlb->target_as != &address_space_memory) { + error_report("Wrong target AS \"%s\", only system memory is allowed", + iotlb->target_as->name ? iotlb->target_as->name : "none"); + return; + } + RCU_READ_LOCK_GUARD(); + /* check if RAM section out of device range */ + llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova)); + if (int128_gt(llend, int128_make64(v->iova_range.last))) { + error_report("RAM section out of device range (max=0x%" PRIx64 + ", end addr=0x%" PRIx64 ")", + v->iova_range.last, int128_get64(llend)); + return; + } + + if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) { + bool read_only; + + if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL)) { + return; + } + ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, + iotlb->addr_mask + 1, vaddr, read_only); + if (ret) { + error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", " + "0x%" HWADDR_PRIx ", %p) = %d (%m)", + v, iova, iotlb->addr_mask + 1, vaddr, ret); + } + } else { + ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, + iotlb->addr_mask + 1); + if (ret) { + error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " + "0x%" HWADDR_PRIx ") = %d (%m)", + v, iova, iotlb->addr_mask + 1, ret); + } + } +} + +static void vhost_vdpa_iommu_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + + struct vdpa_iommu *iommu; + Int128 end; + int iommu_idx; + IOMMUMemoryRegion *iommu_mr; + int ret; + + iommu_mr = IOMMU_MEMORY_REGION(section->mr); + + iommu = g_malloc0(sizeof(*iommu)); + end = int128_add(int128_make64(section->offset_within_region), + section->size); + end = int128_sub(end, int128_one()); + iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr, + MEMTXATTRS_UNSPECIFIED); + iommu->iommu_mr = iommu_mr; + iommu_notifier_init(&iommu->n, vhost_vdpa_iommu_map_notify, + IOMMU_NOTIFIER_IOTLB_EVENTS, + section->offset_within_region, + int128_get64(end), + iommu_idx); + iommu->iommu_offset = section->offset_within_address_space - + section->offset_within_region; + iommu->dev = v; + + ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL); + if (ret) { + g_free(iommu); + return; + } + + QLIST_INSERT_HEAD(&v->iommu_list, iommu, iommu_next); + memory_region_iommu_replay(iommu->iommu_mr, &iommu->n); + + return; +} + +static void vhost_vdpa_iommu_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + + struct vdpa_iommu *iommu; + + QLIST_FOREACH(iommu, &v->iommu_list, iommu_next) + { + if (MEMORY_REGION(iommu->iommu_mr) == section->mr && + iommu->n.start == section->offset_within_region) { + memory_region_unregister_iommu_notifier(section->mr, &iommu->n); + QLIST_REMOVE(iommu, iommu_next); + g_free(iommu); + break; + } + } +} + static void vhost_vdpa_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { @@ -199,6 +317,10 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, v->iova_range.last)) { return; } + if (memory_region_is_iommu(section->mr)) { + vhost_vdpa_iommu_region_add(listener, section); + return; + } if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != (section->offset_within_region & ~TARGET_PAGE_MASK))) { @@ -278,6 +400,9 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, v->iova_range.last)) { return; } + if (memory_region_is_iommu(section->mr)) { + vhost_vdpa_iommu_region_del(listener, section); + } if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != (section->offset_within_region & ~TARGET_PAGE_MASK))) { @@ -1182,7 +1307,13 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) } if (started) { - memory_listener_register(&v->listener, &address_space_memory); + if (vhost_dev_has_iommu(dev) && (v->shadow_vqs_enabled)) { + error_report("SVQ can not work while IOMMU enable, please disable" + "IOMMU and try again"); + return -1; + } + memory_listener_register(&v->listener, dev->vdev->dma_as); + return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); } diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index c278a2a8de..e64bfc7f98 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -52,6 +52,8 @@ typedef struct vhost_vdpa { struct vhost_dev *dev; Error *migration_blocker; VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; + QLIST_HEAD(, vdpa_iommu) iommu_list; + IOMMUNotifier n; } VhostVDPA; int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range); @@ -61,4 +63,13 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, hwaddr size); +typedef struct vdpa_iommu { + struct vhost_vdpa *dev; + IOMMUMemoryRegion *iommu_mr; + hwaddr iommu_offset; + IOMMUNotifier n; + QLIST_ENTRY(vdpa_iommu) iommu_next; +} VDPAIOMMUState; + + #endif From 273d65020b04f8a01e4b5cd543aeef1624b17001 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Mon, 13 Feb 2023 17:19:56 +0100 Subject: [PATCH 28/40] hw/pci-host/i440fx: Inline sysbus_add_io() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sysbus_add_io() just wraps memory_region_add_subregion() while also obscuring where the memory is attached. So use memory_region_add_subregion() directly and attach it to the existing memory region s->bus->address_space_io which is set as an alias to get_system_io() by the pc machine. Signed-off-by: Bernhard Beschow Reviewed-by: Thomas Huth Message-Id: <20230213162004.2797-2-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé --- hw/pci-host/i440fx.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 262f82c303..9c6882d3fc 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -27,6 +27,7 @@ #include "qemu/range.h" #include "hw/i386/pc.h" #include "hw/pci/pci.h" +#include "hw/pci/pci_bus.h" #include "hw/pci/pci_host.h" #include "hw/pci-host/i440fx.h" #include "hw/qdev-properties.h" @@ -217,10 +218,10 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) PCIHostState *s = PCI_HOST_BRIDGE(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - sysbus_add_io(sbd, 0xcf8, &s->conf_mem); + memory_region_add_subregion(s->bus->address_space_io, 0xcf8, &s->conf_mem); sysbus_init_ioports(sbd, 0xcf8, 4); - sysbus_add_io(sbd, 0xcfc, &s->data_mem); + memory_region_add_subregion(s->bus->address_space_io, 0xcfc, &s->data_mem); sysbus_init_ioports(sbd, 0xcfc, 4); /* register i440fx 0xcf8 port as coalesced pio */ From 67b4a74a0743c4cdb78bf884cea2407645530af3 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Mon, 13 Feb 2023 17:19:57 +0100 Subject: [PATCH 29/40] hw/pci-host/q35: Inline sysbus_add_io() sysbus_add_io() just wraps memory_region_add_subregion() while also obscuring where the memory is attached. So use memory_region_add_subregion() directly and attach it to the existing memory region s->mch.address_space_io which is set as an alias to get_system_io() by the q35 machine. Signed-off-by: Bernhard Beschow Reviewed-by: Thomas Huth Message-Id: <20230213162004.2797-3-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-host/q35.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 26390863d6..fa05844319 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -50,10 +50,12 @@ static void q35_host_realize(DeviceState *dev, Error **errp) Q35PCIHost *s = Q35_HOST_DEVICE(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - sysbus_add_io(sbd, MCH_HOST_BRIDGE_CONFIG_ADDR, &pci->conf_mem); + memory_region_add_subregion(s->mch.address_space_io, + MCH_HOST_BRIDGE_CONFIG_ADDR, &pci->conf_mem); sysbus_init_ioports(sbd, MCH_HOST_BRIDGE_CONFIG_ADDR, 4); - sysbus_add_io(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, &pci->data_mem); + memory_region_add_subregion(s->mch.address_space_io, + MCH_HOST_BRIDGE_CONFIG_DATA, &pci->data_mem); sysbus_init_ioports(sbd, MCH_HOST_BRIDGE_CONFIG_DATA, 4); /* register q35 0xcf8 port as coalesced pio */ From 1ab7167b09446996a8c967c6fe2eb4dcb9f53c87 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Mon, 13 Feb 2023 17:19:58 +0100 Subject: [PATCH 30/40] hw/i386/pc_q35: Reuse machine parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bernhard Beschow Reviewed-by: Thomas Huth Message-Id: <20230213162004.2797-4-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé --- hw/i386/pc_q35.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index f02919d92c..a76c2d4501 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -218,7 +218,7 @@ static void pc_q35_init(MachineState *machine) pc_memory_init(pcms, get_system_memory(), rom_memory, &ram_memory, pci_hole64_size); - object_property_add_child(qdev_get_machine(), "q35", OBJECT(q35_host)); + object_property_add_child(OBJECT(machine), "q35", OBJECT(q35_host)); object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM, OBJECT(ram_memory), NULL); object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_PCI_MEM, From 1e366da0318bd9fb8cea66914e9682ad6e7d0a94 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Mon, 13 Feb 2023 17:19:59 +0100 Subject: [PATCH 31/40] hw/i386/pc_{q35,piix}: Reuse MachineClass::desc as SMB product name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to repeat the descriptions. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Thomas Huth Message-Id: <20230213162004.2797-5-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 2 +- hw/i386/pc_q35.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 66a849d279..a9c40201fb 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -198,7 +198,7 @@ static void pc_init1(MachineState *machine, if (pcmc->smbios_defaults) { MachineClass *mc = MACHINE_GET_CLASS(machine); /* These values are guest ABI, do not change */ - smbios_set_defaults("QEMU", "Standard PC (i440FX + PIIX, 1996)", + smbios_set_defaults("QEMU", mc->desc, mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, pcms->smbios_entry_point_type); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index a76c2d4501..89e69737d6 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -199,7 +199,7 @@ static void pc_q35_init(MachineState *machine) if (pcmc->smbios_defaults) { /* These values are guest ABI, do not change */ - smbios_set_defaults("QEMU", "Standard PC (Q35 + ICH9, 2009)", + smbios_set_defaults("QEMU", mc->desc, mc->name, pcmc->smbios_legacy_mode, pcmc->smbios_uuid_encoded, pcms->smbios_entry_point_type); From 8631743c0968d11983a3a04939c580160bb79ac3 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Mon, 13 Feb 2023 17:20:00 +0100 Subject: [PATCH 32/40] hw/i386/pc_{q35,piix}: Minimize usage of get_system_memory() Signed-off-by: Bernhard Beschow Reviewed-by: Thomas Huth Message-Id: <20230213162004.2797-6-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 2 +- hw/i386/pc_q35.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index a9c40201fb..ac7b1a9194 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -242,7 +242,7 @@ static void pc_init1(MachineState *machine, isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(piix3), "isa.0")); } else { pci_bus = NULL; - isa_bus = isa_bus_new(NULL, get_system_memory(), system_io, + isa_bus = isa_bus_new(NULL, system_memory, system_io, &error_abort); i8257_dma_init(isa_bus, 0); pcms->hpet_enabled = false; diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 89e69737d6..1490f87adb 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -126,6 +126,7 @@ static void pc_q35_init(MachineState *machine) DeviceState *lpc_dev; BusState *idebus[MAX_SATA_PORTS]; ISADevice *rtc_state; + MemoryRegion *system_memory = get_system_memory(); MemoryRegion *system_io = get_system_io(); MemoryRegion *pci_memory; MemoryRegion *rom_memory; @@ -192,7 +193,7 @@ static void pc_q35_init(MachineState *machine) rom_memory = pci_memory; } else { pci_memory = NULL; - rom_memory = get_system_memory(); + rom_memory = system_memory; } pc_guest_info_init(pcms); @@ -215,7 +216,7 @@ static void pc_q35_init(MachineState *machine) } /* allocate ram and load rom/bios */ - pc_memory_init(pcms, get_system_memory(), rom_memory, &ram_memory, + pc_memory_init(pcms, system_memory, rom_memory, &ram_memory, pci_hole64_size); object_property_add_child(OBJECT(machine), "q35", OBJECT(q35_host)); @@ -224,7 +225,7 @@ static void pc_q35_init(MachineState *machine) object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_PCI_MEM, OBJECT(pci_memory), NULL); object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_SYSTEM_MEM, - OBJECT(get_system_memory()), NULL); + OBJECT(system_memory), NULL); object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_IO_MEM, OBJECT(system_io), NULL); object_property_set_int(OBJECT(q35_host), PCI_HOST_BELOW_4G_MEM_SIZE, From f9fddaf7ce26acc48fc899673affe957103862a5 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Mon, 13 Feb 2023 17:20:01 +0100 Subject: [PATCH 33/40] hw/i386/pc: Initialize ram_memory variable directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Going through pc_memory_init() seems quite complicated for a simple assignment. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230213162004.2797-7-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 2 -- hw/i386/pc_piix.c | 4 ++-- hw/i386/pc_q35.c | 6 ++---- include/hw/i386/pc.h | 1 - 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7802dc21d9..03da571bda 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -952,7 +952,6 @@ static hwaddr pc_max_used_gpa(PCMachineState *pcms, uint64_t pci_hole64_size) void pc_memory_init(PCMachineState *pcms, MemoryRegion *system_memory, MemoryRegion *rom_memory, - MemoryRegion **ram_memory, uint64_t pci_hole64_size) { int linux_boot, i; @@ -1010,7 +1009,6 @@ void pc_memory_init(PCMachineState *pcms, * Split single memory region and use aliases to address portions of it, * done for backwards compatibility with older qemus. */ - *ram_memory = machine->ram; ram_below_4g = g_malloc(sizeof(*ram_below_4g)); memory_region_init_alias(ram_below_4g, NULL, "ram-below-4g", machine->ram, 0, x86ms->below_4g_mem_size); diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index ac7b1a9194..1acf02e711 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -144,6 +144,7 @@ static void pc_init1(MachineState *machine, if (xen_enabled()) { xen_hvm_init_pc(pcms, &ram_memory); } else { + ram_memory = machine->ram; if (!pcms->max_ram_below_4g) { pcms->max_ram_below_4g = 0xe0000000; /* default: 3.5G */ } @@ -206,8 +207,7 @@ static void pc_init1(MachineState *machine, /* allocate ram and load rom/bios */ if (!xen_enabled()) { - pc_memory_init(pcms, system_memory, - rom_memory, &ram_memory, hole64_size); + pc_memory_init(pcms, system_memory, rom_memory, hole64_size); } else { pc_system_flash_cleanup_unused(pcms); if (machine->kernel_filename != NULL) { diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 1490f87adb..c1535af739 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -130,7 +130,6 @@ static void pc_q35_init(MachineState *machine) MemoryRegion *system_io = get_system_io(); MemoryRegion *pci_memory; MemoryRegion *rom_memory; - MemoryRegion *ram_memory; GSIState *gsi_state; ISABus *isa_bus; int i; @@ -216,12 +215,11 @@ static void pc_q35_init(MachineState *machine) } /* allocate ram and load rom/bios */ - pc_memory_init(pcms, system_memory, rom_memory, &ram_memory, - pci_hole64_size); + pc_memory_init(pcms, system_memory, rom_memory, pci_hole64_size); object_property_add_child(OBJECT(machine), "q35", OBJECT(q35_host)); object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM, - OBJECT(ram_memory), NULL); + OBJECT(machine->ram), NULL); object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_PCI_MEM, OBJECT(pci_memory), NULL); object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_SYSTEM_MEM, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 84935fc958..4e638564ca 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -162,7 +162,6 @@ void xen_load_linux(PCMachineState *pcms); void pc_memory_init(PCMachineState *pcms, MemoryRegion *system_memory, MemoryRegion *rom_memory, - MemoryRegion **ram_memory, uint64_t pci_hole64_size); uint64_t pc_pci_hole64_start(void); DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); From 9e57b81861e05b2856ed1c4fbc2d991801c7c777 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Mon, 13 Feb 2023 17:20:02 +0100 Subject: [PATCH 34/40] hw/pci-host/pam: Make init_pam() usage more readable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Unlike pam_update() which takes the subject -- PAMMemoryRegion -- as first argument, init_pam() takes it as fifth (!) argument. This makes it quite hard to figure out what an init_pam() invocation actually initializes. By moving the subject to the front this should become clearer. While at it, lower the DeviceState parameter to Object, also communicating more clearly that this parameter is just the owner rather than some (heavy?) dependency. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230213162004.2797-8-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-host/i440fx.c | 10 +++++----- hw/pci-host/pam.c | 12 ++++++------ hw/pci-host/q35.c | 8 ++++---- include/hw/pci-host/pam.h | 5 +++-- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 9c6882d3fc..61e7b97ff4 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -292,12 +292,12 @@ PCIBus *i440fx_init(const char *pci_type, object_property_add_const_link(qdev_get_machine(), "smram", OBJECT(&f->smram)); - init_pam(dev, f->ram_memory, f->system_memory, f->pci_address_space, - &f->pam_regions[0], PAM_BIOS_BASE, PAM_BIOS_SIZE); + init_pam(&f->pam_regions[0], OBJECT(d), f->ram_memory, f->system_memory, + f->pci_address_space, PAM_BIOS_BASE, PAM_BIOS_SIZE); for (i = 0; i < ARRAY_SIZE(f->pam_regions) - 1; ++i) { - init_pam(dev, f->ram_memory, f->system_memory, f->pci_address_space, - &f->pam_regions[i+1], PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, - PAM_EXPAN_SIZE); + init_pam(&f->pam_regions[i + 1], OBJECT(d), f->ram_memory, + f->system_memory, f->pci_address_space, + PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); } ram_size = ram_size / 8 / 1024 / 1024; diff --git a/hw/pci-host/pam.c b/hw/pci-host/pam.c index 454dd120db..68e9884d27 100644 --- a/hw/pci-host/pam.c +++ b/hw/pci-host/pam.c @@ -30,24 +30,24 @@ #include "qemu/osdep.h" #include "hw/pci-host/pam.h" -void init_pam(DeviceState *dev, MemoryRegion *ram_memory, +void init_pam(PAMMemoryRegion *mem, Object *owner, MemoryRegion *ram_memory, MemoryRegion *system_memory, MemoryRegion *pci_address_space, - PAMMemoryRegion *mem, uint32_t start, uint32_t size) + uint32_t start, uint32_t size) { int i; /* RAM */ - memory_region_init_alias(&mem->alias[3], OBJECT(dev), "pam-ram", ram_memory, + memory_region_init_alias(&mem->alias[3], owner, "pam-ram", ram_memory, start, size); /* ROM (XXX: not quite correct) */ - memory_region_init_alias(&mem->alias[1], OBJECT(dev), "pam-rom", ram_memory, + memory_region_init_alias(&mem->alias[1], owner, "pam-rom", ram_memory, start, size); memory_region_set_readonly(&mem->alias[1], true); /* XXX: should distinguish read/write cases */ - memory_region_init_alias(&mem->alias[0], OBJECT(dev), "pam-pci", pci_address_space, + memory_region_init_alias(&mem->alias[0], owner, "pam-pci", pci_address_space, start, size); - memory_region_init_alias(&mem->alias[2], OBJECT(dev), "pam-pci", ram_memory, + memory_region_init_alias(&mem->alias[2], owner, "pam-pci", ram_memory, start, size); memory_region_transaction_begin(); diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index fa05844319..fd18920e7f 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -645,12 +645,12 @@ static void mch_realize(PCIDevice *d, Error **errp) object_property_add_const_link(qdev_get_machine(), "smram", OBJECT(&mch->smram)); - init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, - mch->pci_address_space, &mch->pam_regions[0], + init_pam(&mch->pam_regions[0], OBJECT(mch), mch->ram_memory, + mch->system_memory, mch->pci_address_space, PAM_BIOS_BASE, PAM_BIOS_SIZE); for (i = 0; i < ARRAY_SIZE(mch->pam_regions) - 1; ++i) { - init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, - mch->pci_address_space, &mch->pam_regions[i+1], + init_pam(&mch->pam_regions[i + 1], OBJECT(mch), mch->ram_memory, + mch->system_memory, mch->pci_address_space, PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); } } diff --git a/include/hw/pci-host/pam.h b/include/hw/pci-host/pam.h index c1fd06ba2a..005916f826 100644 --- a/include/hw/pci-host/pam.h +++ b/include/hw/pci-host/pam.h @@ -87,8 +87,9 @@ typedef struct PAMMemoryRegion { unsigned current; } PAMMemoryRegion; -void init_pam(DeviceState *dev, MemoryRegion *ram, MemoryRegion *system, - MemoryRegion *pci, PAMMemoryRegion *mem, uint32_t start, uint32_t size); +void init_pam(PAMMemoryRegion *mem, Object *owner, MemoryRegion *ram, + MemoryRegion *system, MemoryRegion *pci, + uint32_t start, uint32_t size); void pam_update(PAMMemoryRegion *mem, int idx, uint8_t val); #endif /* QEMU_PAM_H */ From 206e91d143301414df2deb48a411e402414ba6db Mon Sep 17 00:00:00 2001 From: Viktor Prutyanov Date: Fri, 12 May 2023 16:51:20 +0300 Subject: [PATCH 35/40] virtio-pci: add handling of PCI ATS and Device-TLB enable/disable According to PCIe Address Translation Services specification 5.1.3., ATS Control Register has Enable bit to enable/disable ATS. Guest may enable/disable PCI ATS and, accordingly, Device-TLB for the VirtIO PCI device. So, raise/lower a flag and call a trigger function to pass this event to a device implementation. Signed-off-by: Viktor Prutyanov Message-Id: <20230512135122.70403-2-viktor@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 36 ++++++++++++++++++++++++++++++++++++ include/hw/virtio/virtio.h | 2 ++ 2 files changed, 38 insertions(+) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 02fb84a8fa..edbc0daa18 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -716,6 +716,38 @@ virtio_address_space_read(VirtIOPCIProxy *proxy, hwaddr addr, } } +static void virtio_pci_ats_ctrl_trigger(PCIDevice *pci_dev, bool enable) +{ + VirtIOPCIProxy *proxy = VIRTIO_PCI(pci_dev); + VirtIODevice *vdev = virtio_bus_get_device(&proxy->bus); + VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev); + + vdev->device_iotlb_enabled = enable; + + if (k->toggle_device_iotlb) { + k->toggle_device_iotlb(vdev); + } +} + +static void pcie_ats_config_write(PCIDevice *dev, uint32_t address, + uint32_t val, int len) +{ + uint32_t off; + uint16_t ats_cap = dev->exp.ats_cap; + + if (!ats_cap || address < ats_cap) { + return; + } + off = address - ats_cap; + if (off >= PCI_EXT_CAP_ATS_SIZEOF) { + return; + } + + if (range_covers_byte(off, len, PCI_ATS_CTRL + 1)) { + virtio_pci_ats_ctrl_trigger(dev, !!(val & PCI_ATS_CTRL_ENABLE)); + } +} + static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, uint32_t val, int len) { @@ -729,6 +761,10 @@ static void virtio_write_config(PCIDevice *pci_dev, uint32_t address, pcie_cap_flr_write_config(pci_dev, address, val, len); } + if (proxy->flags & VIRTIO_PCI_FLAG_ATS) { + pcie_ats_config_write(pci_dev, address, val, len); + } + if (range_covers_byte(address, len, PCI_COMMAND)) { if (!(pci_dev->config[PCI_COMMAND] & PCI_COMMAND_MASTER)) { virtio_set_disabled(vdev, true); diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index f6b38f7e9c..af86ed7249 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -155,6 +155,7 @@ struct VirtIODevice QLIST_HEAD(, VirtQueue) *vector_queues; QTAILQ_ENTRY(VirtIODevice) next; EventNotifier config_notifier; + bool device_iotlb_enabled; }; struct VirtioDeviceClass { @@ -212,6 +213,7 @@ struct VirtioDeviceClass { const VMStateDescription *vmsd; bool (*primary_unplug_pending)(void *opaque); struct vhost_dev *(*get_vhost)(VirtIODevice *vdev); + void (*toggle_device_iotlb)(VirtIODevice *vdev); }; void virtio_instance_init_common(Object *proxy_obj, void *data, From 6a36a4ced803838cbba5f90b1b765d8ef6255115 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 15 May 2023 16:28:46 +0200 Subject: [PATCH 36/40] hw/pci-bridge: make building pcie-to-pci bridge configurable Introduce a CONFIG option to build the pcie-to-pci bridge. No functional change since it's enabled per default for PCIE_PORT=y. Signed-off-by: Sebastian Ott Message-Id: <72b6599d-6b27-00b5-aac5-2ebc16a2e023@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/Kconfig | 5 +++++ hw/pci-bridge/meson.build | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/pci-bridge/Kconfig b/hw/pci-bridge/Kconfig index 02614f49aa..67077366cc 100644 --- a/hw/pci-bridge/Kconfig +++ b/hw/pci-bridge/Kconfig @@ -3,6 +3,11 @@ config PCIE_PORT default y if PCI_DEVICES depends on PCI_EXPRESS && MSI_NONBROKEN +config PCIE_PCI_BRIDGE + bool + default y if PCIE_PORT + depends on PCIE_PORT + config PXB bool default y if Q35 || ARM_VIRT diff --git a/hw/pci-bridge/meson.build b/hw/pci-bridge/meson.build index fe92d43de6..0edc6c7cbf 100644 --- a/hw/pci-bridge/meson.build +++ b/hw/pci-bridge/meson.build @@ -2,7 +2,8 @@ pci_ss = ss.source_set() pci_ss.add(files('pci_bridge_dev.c')) pci_ss.add(when: 'CONFIG_I82801B11', if_true: files('i82801b11.c')) pci_ss.add(when: 'CONFIG_IOH3420', if_true: files('ioh3420.c')) -pci_ss.add(when: 'CONFIG_PCIE_PORT', if_true: files('pcie_root_port.c', 'gen_pcie_root_port.c', 'pcie_pci_bridge.c')) +pci_ss.add(when: 'CONFIG_PCIE_PORT', if_true: files('pcie_root_port.c', 'gen_pcie_root_port.c')) +pci_ss.add(when: 'CONFIG_PCIE_PCI_BRIDGE', if_true: files('pcie_pci_bridge.c')) pci_ss.add(when: 'CONFIG_PXB', if_true: files('pci_expander_bridge.c'), if_false: files('pci_expander_bridge_stubs.c')) pci_ss.add(when: 'CONFIG_XIO3130', if_true: files('xio3130_upstream.c', 'xio3130_downstream.c')) From b6aab4597173f5d112a8aa0f3ded502cfb7042a9 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 23 Apr 2023 17:20:08 +0100 Subject: [PATCH 37/40] hw/cxl: rename mailbox return code type from ret_code to CXLRetCode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Given the increasing usage of this mailbox return code type, now is a good time to switch to QEMU style naming. Reviewed-by: Ira Weiny Reviewed-by: Fan Ni Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Jonathan Cameron Message-Id: <20230423162013.4535-2-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-mailbox-utils.c | 64 +++++++++++++++++++------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index ed663cc04a..7b2aef0d67 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -23,7 +23,7 @@ * FOO = 0x7f, * #define BAR 0 * 2. Implement the handler - * static ret_code cmd_foo_bar(struct cxl_cmd *cmd, + * static CXLRetCode cmd_foo_bar(struct cxl_cmd *cmd, * CXLDeviceState *cxl_dstate, uint16_t *len) * 3. Add the command to the cxl_cmd_set[][] * [FOO][BAR] = { "FOO_BAR", cmd_foo_bar, x, y }, @@ -90,10 +90,10 @@ typedef enum { CXL_MBOX_UNSUPPORTED_MAILBOX = 0x15, CXL_MBOX_INVALID_PAYLOAD_LENGTH = 0x16, CXL_MBOX_MAX = 0x17 -} ret_code; +} CXLRetCode; struct cxl_cmd; -typedef ret_code (*opcode_handler)(struct cxl_cmd *cmd, +typedef CXLRetCode (*opcode_handler)(struct cxl_cmd *cmd, CXLDeviceState *cxl_dstate, uint16_t *len); struct cxl_cmd { const char *name; @@ -105,16 +105,16 @@ struct cxl_cmd { #define DEFINE_MAILBOX_HANDLER_ZEROED(name, size) \ uint16_t __zero##name = size; \ - static ret_code cmd_##name(struct cxl_cmd *cmd, \ - CXLDeviceState *cxl_dstate, uint16_t *len) \ + static CXLRetCode cmd_##name(struct cxl_cmd *cmd, \ + CXLDeviceState *cxl_dstate, uint16_t *len) \ { \ *len = __zero##name; \ memset(cmd->payload, 0, *len); \ return CXL_MBOX_SUCCESS; \ } #define DEFINE_MAILBOX_HANDLER_NOP(name) \ - static ret_code cmd_##name(struct cxl_cmd *cmd, \ - CXLDeviceState *cxl_dstate, uint16_t *len) \ + static CXLRetCode cmd_##name(struct cxl_cmd *cmd, \ + CXLDeviceState *cxl_dstate, uint16_t *len) \ { \ return CXL_MBOX_SUCCESS; \ } @@ -125,9 +125,9 @@ DEFINE_MAILBOX_HANDLER_ZEROED(events_get_interrupt_policy, 4); DEFINE_MAILBOX_HANDLER_NOP(events_set_interrupt_policy); /* 8.2.9.2.1 */ -static ret_code cmd_firmware_update_get_info(struct cxl_cmd *cmd, - CXLDeviceState *cxl_dstate, - uint16_t *len) +static CXLRetCode cmd_firmware_update_get_info(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) { struct { uint8_t slots_supported; @@ -159,9 +159,9 @@ static ret_code cmd_firmware_update_get_info(struct cxl_cmd *cmd, } /* 8.2.9.3.1 */ -static ret_code cmd_timestamp_get(struct cxl_cmd *cmd, - CXLDeviceState *cxl_dstate, - uint16_t *len) +static CXLRetCode cmd_timestamp_get(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) { uint64_t time, delta; uint64_t final_time = 0; @@ -181,7 +181,7 @@ static ret_code cmd_timestamp_get(struct cxl_cmd *cmd, } /* 8.2.9.3.2 */ -static ret_code cmd_timestamp_set(struct cxl_cmd *cmd, +static CXLRetCode cmd_timestamp_set(struct cxl_cmd *cmd, CXLDeviceState *cxl_dstate, uint16_t *len) { @@ -201,9 +201,9 @@ static const QemuUUID cel_uuid = { }; /* 8.2.9.4.1 */ -static ret_code cmd_logs_get_supported(struct cxl_cmd *cmd, - CXLDeviceState *cxl_dstate, - uint16_t *len) +static CXLRetCode cmd_logs_get_supported(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) { struct { uint16_t entries; @@ -224,9 +224,9 @@ static ret_code cmd_logs_get_supported(struct cxl_cmd *cmd, } /* 8.2.9.4.2 */ -static ret_code cmd_logs_get_log(struct cxl_cmd *cmd, - CXLDeviceState *cxl_dstate, - uint16_t *len) +static CXLRetCode cmd_logs_get_log(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) { struct { QemuUUID uuid; @@ -265,9 +265,9 @@ static ret_code cmd_logs_get_log(struct cxl_cmd *cmd, } /* 8.2.9.5.1.1 */ -static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd, - CXLDeviceState *cxl_dstate, - uint16_t *len) +static CXLRetCode cmd_identify_memory_device(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) { struct { char fw_revision[0x10]; @@ -309,9 +309,9 @@ static ret_code cmd_identify_memory_device(struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -static ret_code cmd_ccls_get_partition_info(struct cxl_cmd *cmd, - CXLDeviceState *cxl_dstate, - uint16_t *len) +static CXLRetCode cmd_ccls_get_partition_info(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) { struct { uint64_t active_vmem; @@ -339,9 +339,9 @@ static ret_code cmd_ccls_get_partition_info(struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -static ret_code cmd_ccls_get_lsa(struct cxl_cmd *cmd, - CXLDeviceState *cxl_dstate, - uint16_t *len) +static CXLRetCode cmd_ccls_get_lsa(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) { struct { uint32_t offset; @@ -364,9 +364,9 @@ static ret_code cmd_ccls_get_lsa(struct cxl_cmd *cmd, return CXL_MBOX_SUCCESS; } -static ret_code cmd_ccls_set_lsa(struct cxl_cmd *cmd, - CXLDeviceState *cxl_dstate, - uint16_t *len) +static CXLRetCode cmd_ccls_set_lsa(struct cxl_cmd *cmd, + CXLDeviceState *cxl_dstate, + uint16_t *len) { struct set_lsa_pl { uint32_t offset; From 547a652fd1e10c2b6a2b9b91084e4c1cea8665a2 Mon Sep 17 00:00:00 2001 From: Ira Weiny Date: Sun, 23 Apr 2023 17:20:09 +0100 Subject: [PATCH 38/40] hw/cxl: Introduce cxl_device_get_timestamp() utility function There are new users of this functionality coming shortly so factor it out from the GET_TIMESTAMP mailbox command handling. Signed-off-by: Ira Weiny Reviewed-by: Fan Ni Signed-off-by: Jonathan Cameron Message-Id: <20230423162013.4535-3-Jonathan.Cameron@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/cxl/cxl-device-utils.c | 15 +++++++++++++++ hw/cxl/cxl-mailbox-utils.c | 11 +---------- include/hw/cxl/cxl_device.h | 2 ++ 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/hw/cxl/cxl-device-utils.c b/hw/cxl/cxl-device-utils.c index 4c5e88aaf5..86e1cea8ce 100644 --- a/hw/cxl/cxl-device-utils.c +++ b/hw/cxl/cxl-device-utils.c @@ -269,3 +269,18 @@ void cxl_device_register_init_common(CXLDeviceState *cxl_dstate) cxl_initialize_mailbox(cxl_dstate); } + +uint64_t cxl_device_get_timestamp(CXLDeviceState *cxl_dstate) +{ + uint64_t time, delta; + uint64_t final_time = 0; + + if (cxl_dstate->timestamp.set) { + /* Find the delta from the last time the host set the time. */ + time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + delta = time - cxl_dstate->timestamp.last_set; + final_time = cxl_dstate->timestamp.host_set + delta; + } + + return final_time; +} diff --git a/hw/cxl/cxl-mailbox-utils.c b/hw/cxl/cxl-mailbox-utils.c index 7b2aef0d67..702e16ca20 100644 --- a/hw/cxl/cxl-mailbox-utils.c +++ b/hw/cxl/cxl-mailbox-utils.c @@ -163,17 +163,8 @@ static CXLRetCode cmd_timestamp_get(struct cxl_cmd *cmd, CXLDeviceState *cxl_dstate, uint16_t *len) { - uint64_t time, delta; - uint64_t final_time = 0; + uint64_t final_time = cxl_device_get_timestamp(cxl_dstate); - if (cxl_dstate->timestamp.set) { - /* First find the delta from the last time the host set the time. */ - time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); - delta = time - cxl_dstate->timestamp.last_set; - final_time = cxl_dstate->timestamp.host_set + delta; - } - - /* Then adjust the actual time */ stq_le_p(cmd->payload, final_time); *len = 8; diff --git a/include/hw/cxl/cxl_device.h b/include/hw/cxl/cxl_device.h index edb9791bab..02befda0f6 100644 --- a/include/hw/cxl/cxl_device.h +++ b/include/hw/cxl/cxl_device.h @@ -287,4 +287,6 @@ MemTxResult cxl_type3_read(PCIDevice *d, hwaddr host_addr, uint64_t *data, MemTxResult cxl_type3_write(PCIDevice *d, hwaddr host_addr, uint64_t data, unsigned size, MemTxAttrs attrs); +uint64_t cxl_device_get_timestamp(CXLDeviceState *cxlds); + #endif From f0bc6bf725428860b479cb771e99bb33a3f5847d Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 19 May 2023 10:47:33 +0200 Subject: [PATCH 39/40] hw/i386/pc: Create RTC controllers in south bridges Just like in the real hardware (and in PIIX4), create the RTC controllers in the south bridges. Signed-off-by: Bernhard Beschow Reviewed-by: Michael S. Tsirkin Reviewed-by: Thomas Huth Message-Id: <20230519084734.220480-2-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 12 +++++++++++- hw/i386/pc_piix.c | 8 ++++++++ hw/i386/pc_q35.c | 2 ++ hw/isa/Kconfig | 2 ++ hw/isa/lpc_ich9.c | 8 ++++++++ hw/isa/piix3.c | 15 +++++++++++++++ include/hw/southbridge/ich9.h | 2 ++ include/hw/southbridge/piix.h | 3 +++ 8 files changed, 51 insertions(+), 1 deletion(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 03da571bda..4a73786e20 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1318,7 +1318,17 @@ void pc_basic_device_init(struct PCMachineState *pcms, pit_alt_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_PIT_INT); rtc_irq = qdev_get_gpio_in(hpet, HPET_LEGACY_RTC_INT); } - *rtc_state = ISA_DEVICE(mc146818_rtc_init(isa_bus, 2000, rtc_irq)); + + if (rtc_irq) { + qdev_connect_gpio_out(DEVICE(*rtc_state), 0, rtc_irq); + } else { + uint32_t irq = object_property_get_uint(OBJECT(*rtc_state), + "irq", + &error_fatal); + isa_connect_gpio_out(*rtc_state, 0, irq); + } + object_property_add_alias(OBJECT(pcms), "rtc-time", OBJECT(*rtc_state), + "date"); #ifdef CONFIG_XEN_EMU if (xen_mode == XEN_EMULATE) { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 1acf02e711..34e81b79d0 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -32,6 +32,7 @@ #include "hw/i386/pc.h" #include "hw/i386/apic.h" #include "hw/pci-host/i440fx.h" +#include "hw/rtc/mc146818rtc.h" #include "hw/southbridge/piix.h" #include "hw/display/ramfb.h" #include "hw/firmware/smbios.h" @@ -240,10 +241,17 @@ static void pc_init1(MachineState *machine, piix3->pic = x86ms->gsi; piix3_devfn = piix3->dev.devfn; isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(piix3), "isa.0")); + rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(pci_dev), + "rtc")); } else { pci_bus = NULL; isa_bus = isa_bus_new(NULL, system_memory, system_io, &error_abort); + + rtc_state = isa_new(TYPE_MC146818_RTC); + qdev_prop_set_int32(DEVICE(rtc_state), "base_year", 2000); + isa_realize_and_unref(rtc_state, isa_bus, &error_fatal); + i8257_dma_init(isa_bus, 0); pcms->hpet_enabled = false; } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index c1535af739..8faeb6ce05 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -241,6 +241,8 @@ static void pc_q35_init(MachineState *machine) x86_machine_is_smm_enabled(x86ms)); pci_realize_and_unref(lpc, host_bus, &error_fatal); + rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(lpc), "rtc")); + object_property_add_link(OBJECT(machine), PC_MACHINE_ACPI_DEVICE_PROP, TYPE_HOTPLUG_HANDLER, (Object **)&x86ms->acpi_dev, diff --git a/hw/isa/Kconfig b/hw/isa/Kconfig index 0156a66889..c10cbc5fc1 100644 --- a/hw/isa/Kconfig +++ b/hw/isa/Kconfig @@ -35,6 +35,7 @@ config PIIX3 bool select I8257 select ISA_BUS + select MC146818RTC config PIIX4 bool @@ -79,3 +80,4 @@ config LPC_ICH9 select I8257 select ISA_BUS select ACPI_ICH9 + select MC146818RTC diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 9714b0001e..9c47a2f6c7 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -658,6 +658,8 @@ static void ich9_lpc_initfn(Object *obj) static const uint8_t acpi_enable_cmd = ICH9_APM_ACPI_ENABLE; static const uint8_t acpi_disable_cmd = ICH9_APM_ACPI_DISABLE; + object_initialize_child(obj, "rtc", &lpc->rtc, TYPE_MC146818_RTC); + object_property_add_uint8_ptr(obj, ACPI_PM_PROP_SCI_INT, &lpc->sci_gsi, OBJ_PROP_FLAG_READ); object_property_add_uint8_ptr(OBJECT(lpc), ACPI_PM_PROP_ACPI_ENABLE_CMD, @@ -723,6 +725,12 @@ static void ich9_lpc_realize(PCIDevice *d, Error **errp) i8257_dma_init(isa_bus, 0); + /* RTC */ + qdev_prop_set_int32(DEVICE(&lpc->rtc), "base_year", 2000); + if (!qdev_realize(DEVICE(&lpc->rtc), BUS(isa_bus), errp)) { + return; + } + pci_bus_irqs(pci_bus, ich9_lpc_set_irq, d, ICH9_LPC_NB_PIRQS); pci_bus_map_irqs(pci_bus, ich9_lpc_map_irq); pci_bus_set_route_irq_fn(pci_bus, ich9_route_intx_pin_to_irq); diff --git a/hw/isa/piix3.c b/hw/isa/piix3.c index a9cb39bf21..f9103ea45a 100644 --- a/hw/isa/piix3.c +++ b/hw/isa/piix3.c @@ -28,6 +28,7 @@ #include "hw/dma/i8257.h" #include "hw/southbridge/piix.h" #include "hw/irq.h" +#include "hw/qdev-properties.h" #include "hw/isa/isa.h" #include "hw/xen/xen.h" #include "sysemu/runstate.h" @@ -301,6 +302,12 @@ static void pci_piix3_realize(PCIDevice *dev, Error **errp) PIIX_RCR_IOPORT, &d->rcr_mem, 1); i8257_dma_init(isa_bus, 0); + + /* RTC */ + qdev_prop_set_int32(DEVICE(&d->rtc), "base_year", 2000); + if (!qdev_realize(DEVICE(&d->rtc), BUS(isa_bus), errp)) { + return; + } } static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope) @@ -324,6 +331,13 @@ static void build_pci_isa_aml(AcpiDevAmlIf *adev, Aml *scope) qbus_build_aml(bus, scope); } +static void pci_piix3_init(Object *obj) +{ + PIIX3State *d = PIIX3_PCI_DEVICE(obj); + + object_initialize_child(obj, "rtc", &d->rtc, TYPE_MC146818_RTC); +} + static void pci_piix3_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -350,6 +364,7 @@ static const TypeInfo piix3_pci_type_info = { .name = TYPE_PIIX3_PCI_DEVICE, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(PIIX3State), + .instance_init = pci_piix3_init, .abstract = true, .class_init = pci_piix3_class_init, .interfaces = (InterfaceInfo[]) { diff --git a/include/hw/southbridge/ich9.h b/include/hw/southbridge/ich9.h index 7004eecbf9..fd01649d04 100644 --- a/include/hw/southbridge/ich9.h +++ b/include/hw/southbridge/ich9.h @@ -6,6 +6,7 @@ #include "hw/intc/ioapic.h" #include "hw/pci/pci.h" #include "hw/pci/pci_device.h" +#include "hw/rtc/mc146818rtc.h" #include "exec/memory.h" #include "qemu/notify.h" #include "qom/object.h" @@ -30,6 +31,7 @@ struct ICH9LPCState { */ uint8_t irr[PCI_SLOT_MAX][PCI_NUM_PINS]; + MC146818RtcState rtc; APMState apm; ICH9LPCPMRegs pm; uint32_t sci_level; /* track sci level */ diff --git a/include/hw/southbridge/piix.h b/include/hw/southbridge/piix.h index 0bf48e936d..a840340308 100644 --- a/include/hw/southbridge/piix.h +++ b/include/hw/southbridge/piix.h @@ -13,6 +13,7 @@ #define HW_SOUTHBRIDGE_PIIX_H #include "hw/pci/pci_device.h" +#include "hw/rtc/mc146818rtc.h" /* PIRQRC[A:D]: PIRQx Route Control Registers */ #define PIIX_PIRQCA 0x60 @@ -51,6 +52,8 @@ struct PIIXState { /* This member isn't used. Just for save/load compatibility */ int32_t pci_irq_levels_vmstate[PIIX_NUM_PIRQS]; + MC146818RtcState rtc; + /* Reset Control Register contents */ uint8_t rcr; From 87af48a49c0a5663b3fff58c3407393772d3c448 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 19 May 2023 10:47:34 +0200 Subject: [PATCH 40/40] hw/i386/pc: No need for rtc_state to be an out-parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that the RTC is created as part of the southbridges it doesn't need to be an out-parameter any longer. Signed-off-by: Bernhard Beschow Reviewed-by: Peter Maydell Reviewed-by: Michael S. Tsirkin Reviewed-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230519084734.220480-3-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 12 ++++++------ hw/i386/pc_piix.c | 2 +- hw/i386/pc_q35.c | 2 +- include/hw/i386/pc.h | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 4a73786e20..b3d826a83a 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1265,7 +1265,7 @@ static void pc_superio_init(ISABus *isa_bus, bool create_fdctrl, void pc_basic_device_init(struct PCMachineState *pcms, ISABus *isa_bus, qemu_irq *gsi, - ISADevice **rtc_state, + ISADevice *rtc_state, bool create_fdctrl, uint32_t hpet_irqs) { @@ -1320,14 +1320,14 @@ void pc_basic_device_init(struct PCMachineState *pcms, } if (rtc_irq) { - qdev_connect_gpio_out(DEVICE(*rtc_state), 0, rtc_irq); + qdev_connect_gpio_out(DEVICE(rtc_state), 0, rtc_irq); } else { - uint32_t irq = object_property_get_uint(OBJECT(*rtc_state), + uint32_t irq = object_property_get_uint(OBJECT(rtc_state), "irq", &error_fatal); - isa_connect_gpio_out(*rtc_state, 0, irq); + isa_connect_gpio_out(rtc_state, 0, irq); } - object_property_add_alias(OBJECT(pcms), "rtc-time", OBJECT(*rtc_state), + object_property_add_alias(OBJECT(pcms), "rtc-time", OBJECT(rtc_state), "date"); #ifdef CONFIG_XEN_EMU @@ -1341,7 +1341,7 @@ void pc_basic_device_init(struct PCMachineState *pcms, } #endif - qemu_register_boot_set(pc_boot_set, *rtc_state); + qemu_register_boot_set(pc_boot_set, rtc_state); if (!xen_enabled() && (x86ms->pit == ON_OFF_AUTO_AUTO || x86ms->pit == ON_OFF_AUTO_ON)) { diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 34e81b79d0..10070ea9a5 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -277,7 +277,7 @@ static void pc_init1(MachineState *machine, } /* init basic PC hardware */ - pc_basic_device_init(pcms, isa_bus, x86ms->gsi, &rtc_state, true, + pc_basic_device_init(pcms, isa_bus, x86ms->gsi, rtc_state, true, 0x4); pc_nic_init(pcmc, isa_bus, pci_bus); diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 8faeb6ce05..8030d53da6 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -292,7 +292,7 @@ static void pc_q35_init(MachineState *machine) } /* init basic PC hardware */ - pc_basic_device_init(pcms, isa_bus, x86ms->gsi, &rtc_state, !mc->no_floppy, + pc_basic_device_init(pcms, isa_bus, x86ms->gsi, rtc_state, !mc->no_floppy, 0xff0104); if (pcms->sata_enabled) { diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 4e638564ca..79e755879d 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -167,7 +167,7 @@ uint64_t pc_pci_hole64_start(void); DeviceState *pc_vga_init(ISABus *isa_bus, PCIBus *pci_bus); void pc_basic_device_init(struct PCMachineState *pcms, ISABus *isa_bus, qemu_irq *gsi, - ISADevice **rtc_state, + ISADevice *rtc_state, bool create_fdctrl, uint32_t hpet_irqs); void pc_cmos_init(PCMachineState *pcms,