From 1e77a4a32f8b7b6699a2f8b1f98e8fada902ba1f Mon Sep 17 00:00:00 2001 From: Dorinda Bassey Date: Mon, 7 Oct 2024 09:00:11 +0200 Subject: [PATCH 01/49] virtio-gpu: Add definition for resource_uuid feature Add the VIRTIO_GPU_F_RESOURCE_UUID feature to enable the assignment of resources UUIDs for export to other virtio devices. Signed-off-by: Dorinda Bassey Message-Id: <20241007070013.3350752-1-dbassey@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/display/vhost-user-gpu.c | 8 ++++++++ hw/display/virtio-gpu-base.c | 3 +++ include/hw/virtio/virtio-gpu.h | 3 +++ 3 files changed, 14 insertions(+) diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 12d5c37ee5..2aed6243f6 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -631,6 +631,14 @@ vhost_user_gpu_device_realize(DeviceState *qdev, Error **errp) error_report("EDID requested but the backend doesn't support it."); g->parent_obj.conf.flags &= ~(1 << VIRTIO_GPU_FLAG_EDID_ENABLED); } + if (virtio_has_feature(g->vhost->dev.features, + VIRTIO_GPU_F_RESOURCE_UUID)) { + g->parent_obj.conf.flags |= 1 << VIRTIO_GPU_FLAG_RESOURCE_UUID_ENABLED; + } + if (virtio_has_feature(g->vhost->dev.features, + VIRTIO_GPU_F_RESOURCE_UUID)) { + g->parent_obj.conf.flags |= 1 << VIRTIO_GPU_FLAG_RESOURCE_UUID_ENABLED; + } if (!virtio_gpu_base_device_realize(qdev, NULL, NULL, errp)) { return; diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 4fc7ef8896..7827536ac4 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -235,6 +235,9 @@ virtio_gpu_base_get_features(VirtIODevice *vdev, uint64_t features, if (virtio_gpu_context_init_enabled(g->conf)) { features |= (1 << VIRTIO_GPU_F_CONTEXT_INIT); } + if (virtio_gpu_resource_uuid_enabled(g->conf)) { + features |= (1 << VIRTIO_GPU_F_RESOURCE_UUID); + } return features; } diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index bd93672185..a42957c4e2 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -98,6 +98,7 @@ enum virtio_gpu_base_conf_flags { VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED, VIRTIO_GPU_FLAG_RUTABAGA_ENABLED, VIRTIO_GPU_FLAG_VENUS_ENABLED, + VIRTIO_GPU_FLAG_RESOURCE_UUID_ENABLED, }; #define virtio_gpu_virgl_enabled(_cfg) \ @@ -114,6 +115,8 @@ enum virtio_gpu_base_conf_flags { (_cfg.flags & (1 << VIRTIO_GPU_FLAG_CONTEXT_INIT_ENABLED)) #define virtio_gpu_rutabaga_enabled(_cfg) \ (_cfg.flags & (1 << VIRTIO_GPU_FLAG_RUTABAGA_ENABLED)) +#define virtio_gpu_resource_uuid_enabled(_cfg) \ + (_cfg.flags & (1 << VIRTIO_GPU_FLAG_RESOURCE_UUID_ENABLED)) #define virtio_gpu_hostmem_enabled(_cfg) \ (_cfg.hostmem > 0) #define virtio_gpu_venus_enabled(_cfg) \ From 694632fd44987cc4618612a38ad151047524a590 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Tue, 3 Dec 2024 13:19:28 +0100 Subject: [PATCH 02/49] pci: ensure valid link status bits for downstream ports PCI hotplug for downstream endpoints on arm fails because Linux' PCIe hotplug driver doesn't like the QEMU provided LNKSTA: pcieport 0000:08:01.0: pciehp: Slot(2): Card present pcieport 0000:08:01.0: pciehp: Slot(2): Link Up pcieport 0000:08:01.0: pciehp: Slot(2): Cannot train link: status 0x2000 There's 2 cases where LNKSTA isn't setup properly: * the downstream device has no express capability * max link width of the bridge is 0 Move the sanity checks added via 88c869198aa63 ("pci: Sanity test minimum downstream LNKSTA") outside of the branch to make sure downstream ports always have a valid LNKSTA. Signed-off-by: Sebastian Ott Tested-by: Zhenyu Zhang Message-Id: <20241203121928.14861-1-sebott@redhat.com> Reviewed-by: Alex Williamson Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pcie.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 0b455c8654..1b12db6fa2 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -1113,18 +1113,22 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev) if ((lnksta & PCI_EXP_LNKSTA_NLW) > (lnkcap & PCI_EXP_LNKCAP_MLW)) { lnksta &= ~PCI_EXP_LNKSTA_NLW; lnksta |= lnkcap & PCI_EXP_LNKCAP_MLW; - } else if (!(lnksta & PCI_EXP_LNKSTA_NLW)) { - lnksta |= QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1); } if ((lnksta & PCI_EXP_LNKSTA_CLS) > (lnkcap & PCI_EXP_LNKCAP_SLS)) { lnksta &= ~PCI_EXP_LNKSTA_CLS; lnksta |= lnkcap & PCI_EXP_LNKCAP_SLS; - } else if (!(lnksta & PCI_EXP_LNKSTA_CLS)) { - lnksta |= QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT); } } + if (!(lnksta & PCI_EXP_LNKSTA_NLW)) { + lnksta |= QEMU_PCI_EXP_LNKSTA_NLW(QEMU_PCI_EXP_LNK_X1); + } + + if (!(lnksta & PCI_EXP_LNKSTA_CLS)) { + lnksta |= QEMU_PCI_EXP_LNKSTA_CLS(QEMU_PCI_EXP_LNK_2_5GT); + } + pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); pci_word_test_and_set_mask(exp_cap + PCI_EXP_LNKSTA, lnksta & From e043be2290ffdd666ad2ab35d2341c137b21a3b4 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 10 Dec 2024 17:39:43 +0100 Subject: [PATCH 03/49] tests: acpi: whitelist expected blobs Signed-off-by: Igor Mammedov Message-Id: <20241210163945.3422623-2-imammedo@redhat.com> Tested-by: Eric Mackay Acked-by: Ani Sinha Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test-allowed-diff.h | 42 +++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..a1047913af 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,43 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/x86/pc/DSDT", +"tests/data/acpi/x86/pc/DSDT.acpierst", +"tests/data/acpi/x86/pc/DSDT.acpihmat", +"tests/data/acpi/x86/pc/DSDT.bridge", +"tests/data/acpi/x86/pc/DSDT.cphp", +"tests/data/acpi/x86/pc/DSDT.dimmpxm", +"tests/data/acpi/x86/pc/DSDT.hpbridge", +"tests/data/acpi/x86/pc/DSDT.hpbrroot", +"tests/data/acpi/x86/pc/DSDT.ipmikcs", +"tests/data/acpi/x86/pc/DSDT.memhp", +"tests/data/acpi/x86/pc/DSDT.nohpet", +"tests/data/acpi/x86/pc/DSDT.numamem", +"tests/data/acpi/x86/pc/DSDT.roothp", +"tests/data/acpi/x86/q35/DSDT", +"tests/data/acpi/x86/q35/DSDT.acpierst", +"tests/data/acpi/x86/q35/DSDT.acpihmat", +"tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x", +"tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator", +"tests/data/acpi/x86/q35/DSDT.applesmc", +"tests/data/acpi/x86/q35/DSDT.bridge", +"tests/data/acpi/x86/q35/DSDT.core-count", +"tests/data/acpi/x86/q35/DSDT.core-count2", +"tests/data/acpi/x86/q35/DSDT.cphp", +"tests/data/acpi/x86/q35/DSDT.cxl", +"tests/data/acpi/x86/q35/DSDT.dimmpxm", +"tests/data/acpi/x86/q35/DSDT.ipmibt", +"tests/data/acpi/x86/q35/DSDT.ipmismbus", +"tests/data/acpi/x86/q35/DSDT.ivrs", +"tests/data/acpi/x86/q35/DSDT.memhp", +"tests/data/acpi/x86/q35/DSDT.mmio64", +"tests/data/acpi/x86/q35/DSDT.multi-bridge", +"tests/data/acpi/x86/q35/DSDT.noacpihp", +"tests/data/acpi/x86/q35/DSDT.nohpet", +"tests/data/acpi/x86/q35/DSDT.numamem", +"tests/data/acpi/x86/q35/DSDT.pvpanic-isa", +"tests/data/acpi/x86/q35/DSDT.thread-count", +"tests/data/acpi/x86/q35/DSDT.thread-count2", +"tests/data/acpi/x86/q35/DSDT.tis.tpm12", +"tests/data/acpi/x86/q35/DSDT.tis.tpm2", +"tests/data/acpi/x86/q35/DSDT.type4-count", +"tests/data/acpi/x86/q35/DSDT.viot", +"tests/data/acpi/x86/q35/DSDT.xapic", From 8aa35bebeeaed19ae57afbc3e110b8e7fe8587d0 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 10 Dec 2024 17:39:44 +0100 Subject: [PATCH 04/49] cpuhp: make sure that remove events are handled within the same SCI CPU_SCAN_METHOD was processing insert events first and only if insert event was not present then it would check remove event. Normally it's not an issue as it doesn't make much sense tho hotplug and immediately unplug it. In this corner case, which can be reproduced with: qemu -smp 1,maxcpus=2 -cpu host -monitor stdio \ -drive if=pflash,format=raw,readonly,file=edk2-x86_64-code.fd * boot till GRUB prompt and pause guest (either via monitor or stop GRUB from automatic boot) * at monitor prompt add CPU: device_add host-x86_64-cpu,socket-id=0,core-id=1,thread-id=0,id=foo * let guest OS boot completely, and unplug CPU from monitor prompt: device_del foo which triggers GPE event that leads to CPU_SCAN_METHOD on guest side as result of above cpu 'foo' will not be hotunplugged, since QEMU sees insert event and ignores remove event (leaving it in pending state) for the GPE event. Any follow up CPU hotplug/unplug action from QEMU side will handle previously ignored event, so as workaround user can repeat device_del. Fix this corner-case by queuing remove events independently from insert events, aka the same way as we do with insert events. And then go over remove queue to send eject notify events to OSPM within the same GPE event. PS: Process remove queue after the cpu add queue has been processed 1st to ensure that OSPM gets hotadd evets after hotremove ones. PS2: Case where it's still borken happens when guest OS is Linux and device_del happens before guest OS initializes ACPI subsystem. Culprit in this case though is the guest kernel, which mangles GPE.sts (by clearing them up) and thus pending SCI turns to NOP leaving insert/remove events in pending state. That is the guest bug and should be fixed there. Signed-off-by: Igor Mammedov Reported-by: Eric Mackay Message-Id: <20241210163945.3422623-3-imammedo@redhat.com> Tested-by: Eric Mackay Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/cpu.c | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/hw/acpi/cpu.c b/hw/acpi/cpu.c index 9d530a24da..f70a2c045e 100644 --- a/hw/acpi/cpu.c +++ b/hw/acpi/cpu.c @@ -327,6 +327,7 @@ const VMStateDescription vmstate_cpu_hotplug = { #define CPU_EJECT_METHOD "CEJ0" #define CPU_OST_METHOD "COST" #define CPU_ADDED_LIST "CNEW" +#define CPU_EJ_LIST "CEJL" #define CPU_ENABLED "CPEN" #define CPU_SELECTOR "CSEL" @@ -488,7 +489,6 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, method = aml_method(CPU_SCAN_METHOD, 0, AML_SERIALIZED); { const uint8_t max_cpus_per_pass = 255; - Aml *else_ctx; Aml *while_ctx, *while_ctx2; Aml *has_event = aml_local(0); Aml *dev_chk = aml_int(1); @@ -499,6 +499,8 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, Aml *uid = aml_local(3); Aml *has_job = aml_local(4); Aml *new_cpus = aml_name(CPU_ADDED_LIST); + Aml *ej_cpus = aml_name(CPU_EJ_LIST); + Aml *num_ej_cpus = aml_local(5); aml_append(method, aml_acquire(ctrl_lock, 0xFFFF)); @@ -513,6 +515,8 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, */ aml_append(method, aml_name_decl(CPU_ADDED_LIST, aml_package(max_cpus_per_pass))); + aml_append(method, aml_name_decl(CPU_EJ_LIST, + aml_package(max_cpus_per_pass))); aml_append(method, aml_store(zero, uid)); aml_append(method, aml_store(one, has_job)); @@ -527,6 +531,7 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(while_ctx2, aml_store(one, has_event)); aml_append(while_ctx2, aml_store(zero, num_added_cpus)); + aml_append(while_ctx2, aml_store(zero, num_ej_cpus)); /* * Scan CPUs, till there are CPUs with events or @@ -559,8 +564,10 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, * if CPU_ADDED_LIST is full, exit inner loop and process * collected CPUs */ - ifctx = aml_if( - aml_equal(num_added_cpus, aml_int(max_cpus_per_pass))); + ifctx = aml_if(aml_lor( + aml_equal(num_added_cpus, aml_int(max_cpus_per_pass)), + aml_equal(num_ej_cpus, aml_int(max_cpus_per_pass)) + )); { aml_append(ifctx, aml_store(one, has_job)); aml_append(ifctx, aml_break()); @@ -577,16 +584,16 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(ifctx, aml_store(one, has_event)); } aml_append(while_ctx, ifctx); - else_ctx = aml_else(); + ifctx = aml_if(aml_equal(rm_evt, one)); { - aml_append(ifctx, - aml_call2(CPU_NOTIFY_METHOD, uid, eject_req)); - aml_append(ifctx, aml_store(one, rm_evt)); + /* cache to be removed CPUs to Notify later */ + aml_append(ifctx, aml_store(uid, + aml_index(ej_cpus, num_ej_cpus))); + aml_append(ifctx, aml_increment(num_ej_cpus)); aml_append(ifctx, aml_store(one, has_event)); } - aml_append(else_ctx, ifctx); - aml_append(while_ctx, else_ctx); + aml_append(while_ctx, ifctx); aml_append(while_ctx, aml_increment(uid)); } aml_append(while_ctx2, while_ctx); @@ -620,6 +627,24 @@ void build_cpus_aml(Aml *table, MachineState *machine, CPUHotplugFeatures opts, aml_append(while_ctx, aml_increment(cpu_idx)); } aml_append(while_ctx2, while_ctx); + + /* + * Notify OSPM about to be removed CPUs and clear remove flag + */ + aml_append(while_ctx2, aml_store(zero, cpu_idx)); + while_ctx = aml_while(aml_lless(cpu_idx, num_ej_cpus)); + { + aml_append(while_ctx, + aml_store(aml_derefof(aml_index(ej_cpus, cpu_idx)), + uid)); + aml_append(while_ctx, + aml_call2(CPU_NOTIFY_METHOD, uid, eject_req)); + aml_append(while_ctx, aml_store(uid, cpu_selector)); + aml_append(while_ctx, aml_store(one, rm_evt)); + aml_append(while_ctx, aml_increment(cpu_idx)); + } + aml_append(while_ctx2, while_ctx); + /* * If another batch is needed, then it will resume scanning * exactly at -- and not after -- the last CPU that's currently From 9ccb69df554a5204077cda101b7bcf0f19544553 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 10 Dec 2024 17:39:45 +0100 Subject: [PATCH 05/49] tests: acpi: update expected blobs previous patch has changed cpu hotplug AML, expected diff: @@ -2942,6 +2942,7 @@ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC ", 0x00000001) { Acquire (\_SB.PCI0.PRES.CPLK, 0xFFFF) Name (CNEW, Package (0xFF) {}) + Name (CEJL, Package (0xFF) {}) Local3 = Zero Local4 = One While ((Local4 == One)) @@ -2949,6 +2950,7 @@ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC ", 0x00000001) Local4 = Zero Local0 = One Local1 = Zero + Local5 = Zero While (((Local0 == One) && (Local3 < One))) { Local0 = Zero @@ -2959,7 +2961,7 @@ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC ", 0x00000001) Break } - If ((Local1 == 0xFF)) + If (((Local1 == 0xFF) || (Local5 == 0xFF))) { Local4 = One Break @@ -2972,10 +2974,11 @@ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC ", 0x00000001) Local1++ Local0 = One } - ElseIf ((\_SB.PCI0.PRES.CRMV == One)) + + If ((\_SB.PCI0.PRES.CRMV == One)) { - CTFY (Local3, 0x03) - \_SB.PCI0.PRES.CRMV = One + CEJL [Local5] = Local3 + Local5++ Local0 = One } @@ -2992,6 +2995,16 @@ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC ", 0x00000001) \_SB.PCI0.PRES.CINS = One Local2++ } + + Local2 = Zero + While ((Local2 < Local5)) + { + Local3 = DerefOf (CEJL [Local2]) + CTFY (Local3, 0x03) + \_SB.PCI0.PRES.CSEL = Local3 + \_SB.PCI0.PRES.CRMV = One + Local2++ + } } Release (\_SB.PCI0.PRES.CPLK) Signed-off-by: Igor Mammedov Message-Id: <20241210163945.3422623-4-imammedo@redhat.com> Tested-by: Eric Mackay Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/x86/pc/DSDT | Bin 8526 -> 8593 bytes tests/data/acpi/x86/pc/DSDT.acpierst | Bin 8437 -> 8504 bytes tests/data/acpi/x86/pc/DSDT.acpihmat | Bin 9851 -> 9918 bytes tests/data/acpi/x86/pc/DSDT.bridge | Bin 15397 -> 15464 bytes tests/data/acpi/x86/pc/DSDT.cphp | Bin 8990 -> 9057 bytes tests/data/acpi/x86/pc/DSDT.dimmpxm | Bin 10180 -> 10247 bytes tests/data/acpi/x86/pc/DSDT.hpbridge | Bin 8477 -> 8544 bytes tests/data/acpi/x86/pc/DSDT.hpbrroot | Bin 5033 -> 5100 bytes tests/data/acpi/x86/pc/DSDT.ipmikcs | Bin 8598 -> 8665 bytes tests/data/acpi/x86/pc/DSDT.memhp | Bin 9885 -> 9952 bytes tests/data/acpi/x86/pc/DSDT.nohpet | Bin 8384 -> 8451 bytes tests/data/acpi/x86/pc/DSDT.numamem | Bin 8532 -> 8599 bytes tests/data/acpi/x86/pc/DSDT.roothp | Bin 12319 -> 12386 bytes tests/data/acpi/x86/q35/DSDT | Bin 8355 -> 8422 bytes tests/data/acpi/x86/q35/DSDT.acpierst | Bin 8372 -> 8439 bytes tests/data/acpi/x86/q35/DSDT.acpihmat | Bin 9680 -> 9747 bytes .../data/acpi/x86/q35/DSDT.acpihmat-generic-x | Bin 12565 -> 12632 bytes .../acpi/x86/q35/DSDT.acpihmat-noinitiator | Bin 8634 -> 8701 bytes tests/data/acpi/x86/q35/DSDT.applesmc | Bin 8401 -> 8468 bytes tests/data/acpi/x86/q35/DSDT.bridge | Bin 11968 -> 12035 bytes tests/data/acpi/x86/q35/DSDT.core-count | Bin 12913 -> 12980 bytes tests/data/acpi/x86/q35/DSDT.core-count2 | Bin 33770 -> 33837 bytes tests/data/acpi/x86/q35/DSDT.cphp | Bin 8819 -> 8886 bytes tests/data/acpi/x86/q35/DSDT.cxl | Bin 13146 -> 13213 bytes tests/data/acpi/x86/q35/DSDT.dimmpxm | Bin 10009 -> 10076 bytes tests/data/acpi/x86/q35/DSDT.ipmibt | Bin 8430 -> 8497 bytes tests/data/acpi/x86/q35/DSDT.ipmismbus | Bin 8443 -> 8510 bytes tests/data/acpi/x86/q35/DSDT.ivrs | Bin 8372 -> 8439 bytes tests/data/acpi/x86/q35/DSDT.memhp | Bin 9714 -> 9781 bytes tests/data/acpi/x86/q35/DSDT.mmio64 | Bin 9485 -> 9552 bytes tests/data/acpi/x86/q35/DSDT.multi-bridge | Bin 13208 -> 13275 bytes tests/data/acpi/x86/q35/DSDT.noacpihp | Bin 8235 -> 8302 bytes tests/data/acpi/x86/q35/DSDT.nohpet | Bin 8213 -> 8280 bytes tests/data/acpi/x86/q35/DSDT.numamem | Bin 8361 -> 8428 bytes tests/data/acpi/x86/q35/DSDT.pvpanic-isa | Bin 8456 -> 8523 bytes tests/data/acpi/x86/q35/DSDT.thread-count | Bin 12913 -> 12980 bytes tests/data/acpi/x86/q35/DSDT.thread-count2 | Bin 33770 -> 33837 bytes tests/data/acpi/x86/q35/DSDT.tis.tpm12 | Bin 8961 -> 9028 bytes tests/data/acpi/x86/q35/DSDT.tis.tpm2 | Bin 8987 -> 9054 bytes tests/data/acpi/x86/q35/DSDT.type4-count | Bin 18589 -> 18656 bytes tests/data/acpi/x86/q35/DSDT.viot | Bin 14612 -> 14679 bytes tests/data/acpi/x86/q35/DSDT.xapic | Bin 35718 -> 35785 bytes tests/qtest/bios-tables-test-allowed-diff.h | 42 ------------------ 43 files changed, 42 deletions(-) diff --git a/tests/data/acpi/x86/pc/DSDT b/tests/data/acpi/x86/pc/DSDT index 8b8235fe79e2fa08a6f840c8479edb75f5a047b9..60d50b088a362556fd54395cb15364d6c0936be5 100644 GIT binary patch delta 191 zcmX@-G|`#MCDLNex$(#6mIrt_{PUQMO zIdw7aXmPleqb`EiiOy**qJduH&*yh{(#ex8Vk2-Y# delta 140 zcmV;70CWG5L(W19L{mgmP9XpQ0co)cxeNkNK9j`^umVjplXngM2Sye{Q$tP&lN}Bh zAP7TFMOP96|8M|fZ~$vpf!_R uF9csxLSIlrNia}SMN>mkO;!OzR7P223Imhz4;7Pi4^ER_4i>W>5OWbzU?~#- diff --git a/tests/data/acpi/x86/pc/DSDT.acpierst b/tests/data/acpi/x86/pc/DSDT.acpierst index 06829b9c6c6d726d955dc7c99bc9f42448e22aeb..4c434c25c0b1602f22128e352781df498fa69ddf 100644 GIT binary patch delta 191 zcmezBxWkFdCDZ1Zh?DM0|M$vRg6 delta 140 zcmV;70CWGiLiIrkL{mgm^&kKM0kW|QxeNkNK9j`^umVjplXngM2Sye{Q$tP&lN}Bh zAP7TFMOP96|8M|fZ~$vpf!_R uF9csxLSIlrNia}SMN>mkO;!OzR7P223Imhz4;7Pi4^ER_4i>W>5E>Em)hYD= diff --git a/tests/data/acpi/x86/pc/DSDT.acpihmat b/tests/data/acpi/x86/pc/DSDT.acpihmat index 2fe355ebdbb858fa9247d09112e21712e3eddc45..61b7d5caa55c44dbf69d649110c6b14bb4c3fdf5 100644 GIT binary patch delta 175 zcmezEv(J~yCDvQVJLo3K$X#7*ZGcb5GvO=f}Y}adINp|H-M7`S=}_7N|^) z(PxPdc8U*h_B03xat(G4@(p7wNbUfcmda3?I=PwOPo{t&X^|7l)TGpcX0SvOL$Y&- YTVyg9b3yWCM?O_%pr*~|`PYa60N6n`+5i9m delta 117 zcmV-*0E+*Z1Zh?FC75!n>zpi delta 140 zcmV;70CWH7c%^s>L{mgmB|HEC0e7(qxeNkNK9j`^umVjplXngM2Sye{Q$tP&lN}Bh zAP7TFMOP96|8M|fZ~$vpf!_R uF9csxLSIlrNia}SMN>mkO;!OzR7P223Imhz4;7Pi4^ER_4i>W>5K1hi?{ulVG1 zd>qOg&aPfQLQMY)7?KMZQx-XhO-^AfU`Q!oOekPTEMQ1omGh zPPXTFU|ygydGbPjVIIbUkq;Fd oLsUjtV+sRs0bDNxUsFO~P(w*DP*O!xLsCsvld%s@vqcab6Skcv^Z)<= diff --git a/tests/data/acpi/x86/pc/DSDT.dimmpxm b/tests/data/acpi/x86/pc/DSDT.dimmpxm index 205219b99d903555125c4b07fc047c42993eb338..5b6471c8db9003b39bf5e20af34061f3e71cdbd5 100644 GIT binary patch delta 173 zcmX@&-yXo_66_MfuED^-7{8J0EDw`+$mE+m8<;#jH+Sg6NE z^uK^1xqvZck%QRe6vhIElmf>AO`El?~oSew@e{$+%D}D#11uBzc z^jYGAo#F$WJq-eaT!Wp1e8U(Ek~=`Ar81PJPF~ONCsn|Zw8)8NYEo)JGgu;tA=x>^ WEi##ldGbYmb!MQJ&A<6iiUR=B8#YG( delta 141 zcmZni{HU?q4@&S$uasY@xe~<0nVNV0YR?8 e&OyFmjLso$k;z=l1&mlEC$Hi6+w354TpR%WuPVd< diff --git a/tests/data/acpi/x86/pc/DSDT.hpbridge b/tests/data/acpi/x86/pc/DSDT.hpbridge index 8fa8b519ec65bd5099c45f4e1c85b11b47a23845..67fe28699fbb261cfc7a52b2291f9965ab93c6a8 100644 GIT binary patch delta 191 zcmbR1^uUSBCDZ1Zh?CqV$0Q#xb- delta 140 zcmV;70CWG~LY+biL{mgm9U%Y!0iCf5xeNkNK9j`^umVjplXngM2Sye{Q$tP&lN}Bh zAP7TFMOP96|8M|fZ~$vpf!_R uF9csxLSIlrNia}SMN>mkO;!OzR7P223Imhz4;7Pi4^ER_4i>W>5JC}S!YKj( diff --git a/tests/data/acpi/x86/pc/DSDT.hpbrroot b/tests/data/acpi/x86/pc/DSDT.hpbrroot index 01719462a72fd6d40ce433dac601e4b94eae574c..077a4cc988dc417a1bc9317dddd2dbd96ff1ff50 100644 GIT binary patch delta 195 zcmZ3f{zje4CD9Bw8j=gAe^8<-p{CTsEhW%rbF4tDnAn0$dpT${t$ z&ox|#=|6|FtCtUuE?`J5U`$!$FFHAev4A0^fH9$fA+dlVb&((U2JCu{NiWp@*E4tDnAn0$dpT!F*c z&ox|#>3;!3asgw?A{U{_DU1aSDFuuP1q_Lce0U}o@cJ@yPEMSBl-I#@q4@&S$uasY u@xe~<0nVNV0YR?8&OyFmjLso$k;z=l1&mlEC#&=MO%~)8+kBBvniT*|~5 diff --git a/tests/data/acpi/x86/pc/DSDT.ipmikcs b/tests/data/acpi/x86/pc/DSDT.ipmikcs index 0ca664688b16baa3a06b8440181de4f17511c6b0..9b2e81a7bcefb5c0e2dfbd2bbc5b6ea501f86306 100644 GIT binary patch delta 191 zcmbQ{eAAiBCD*>_hQtDf)J1;WlQ;4Ca_~)@oXGWm za_VFzeg~xmDwAXMS>l78;scyL4FZB(gPntX!x#&aJ3ywTGL)uHF6Z}?Dqu)jJ>>T12nassJc_zO)D^N>m(&RWkvCX&ncMAdl7dShT delta 140 zcmV;70CWG@LzY7dL{mgmmLUKD0p+m@xeNkNK9j`^umVjplXngM2Sye{Q$tP&lN}Bh zAP7TFMOP96|8M|fZ~$vpf!_R uF9csxLSIlrNia}SMN>mkO;!OzR7P223Imhz4;7Pi4^ER_4i>W>5W5k-i7GGv diff --git a/tests/data/acpi/x86/pc/DSDT.memhp b/tests/data/acpi/x86/pc/DSDT.memhp index 03ff464ba4e72082fce0921815cfc09ca20b561a..9c66ccf150af1622d1b788a1ae04a6e5136cff9e 100644 GIT binary patch delta 191 zcmbR1`@omWCDLNex$(#6mIrt_{PUQMO zIdw7aXmPleqb`EiiOy**qJduH&*yh{(hr|HPnL985 delta 140 zcmV;70CWG~O`S~&L{mgmohASP0b8*OxeNkDKa<4_umVFglXngM2Sye{Q$tP&lN}Bh zAP7TFMOP96|8M|fZ~$vpf!_R uF9csxLSIlrNia}SMN>mkO;!OzR7P223Imhz4;7Pi4^ER_4i>W>5W*IV{wdA? diff --git a/tests/data/acpi/x86/pc/DSDT.nohpet b/tests/data/acpi/x86/pc/DSDT.nohpet index b081030f0ed171e52b13e28cfdc8770a04c2806e..28dbd8d8949d1421da9312cf0440d7ae3b64916e 100644 GIT binary patch delta 195 zcmX@$*zCmR66_MftjNH?$gz=2n}^BCd9pdr1||oK$uD{SvU^H72Rr+5Oz!6u*XD5c za}5__`p@C)>g5BZ3mB3M7*iJci%w2qEMQ0}U`!}rNGxDTUF63-S%uG+gKy&GM6Umn zQzw`3IVdeqnH;0f5+CdoAK>h15D?@V>>T79##oTt0WvL>p)_^!OFloT0*0hTPApTC qQVW{F5=jin&LM7*$z05n_4vdl@8K0>1!^lzntXv*Y_l(au^<4GLppx| delta 157 zcmZp6I^f9V66_LkK!JgQ(Rd@5HV>1(-DGo~4NShqlV9@uWp@*E4tDnAnB31RuE62! z=Nc}=^uK^1xqvZck&Dpe6vhIElmf*>_hQtDf)J1;WlQ;4Ca_~)@oXGWm za_VFzeg~xmDwAXMS>l78;scyL4FZB(gPntX!x#&aJ3ywTGL)uHF6Z}?Dqu)jJ>>T12nassJc_zO)D^N>m(&RWkvCX&nD+K}fZ963Z delta 140 zcmV;70CWGBL)1bFL{mgmR3QKW0TZzbxeNkDKa<4_umVFglXngM2Sye{Q$tP&lN}Bh zAP7TFMOP96|8M|fZ~$vpf!_R uF9csxLSIlrNia}SMN>mkO;!OzR7P223Imhz4;7Pi4^ER_4i>W>5P1LNex$(#6mIrt_{PUQMO zIdw7aXmPleqb`EiiOy**qJduH&*yh{(E(!n-m^&T- delta 140 zcmV;70CWH1V4q+LL{mgmA20v_0rjy8xeNkNK9j`^umVjplXngM2Sye{Q$tP&lN}Bh zAP7TFMOP96|8M|fZ~$vpf!_R uF9csxLSIlrNia}SMN>mkO;!OzR7P223Imhz4;7Pi4^ER_4i>W>5JVulO)0zp diff --git a/tests/data/acpi/x86/q35/DSDT b/tests/data/acpi/x86/q35/DSDT index fb89ae0ac6d4346e33156e9e4d3718698a0a1a8e..51ad37a351bffae8fbc9ba17f72c25ef61822f59 100644 GIT binary patch delta 180 zcmZ4N_{@>ZCDUq!chF6fh(fFr+T>fHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRveH7+b|#D diff --git a/tests/data/acpi/x86/q35/DSDT.acpierst b/tests/data/acpi/x86/q35/DSDT.acpierst index 46fd25400b7c00ee9149ddb64cb5d5bd73f6a82b..dbd4f858354df0f4c050fd0b914581154f340ee8 100644 GIT binary patch delta 180 zcmdnu_}!7qCD*Tg0oSn#2N?oSK9ds}umVjplf@bR0!9{-#2O?RZ~$X) z0c4^?5|d;BZ~$a*0bpfHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRve)OE4?7a diff --git a/tests/data/acpi/x86/q35/DSDT.acpihmat b/tests/data/acpi/x86/q35/DSDT.acpihmat index 61c5bd52a42242e85090934e8e45bf01642609d6..952752e30e9dfc9e2085e8fceaa0740dda6db89c 100644 GIT binary patch delta 159 zcmccMJ=urLCD!@#ID>c1b2j&&k4)8<_lUHy@ScU}W-=ntWbLMVZ6d)yqeS z>3;!3asgw?A_uX_DU1aSDFuuP1q_J=45^F!xhHc=`*HA1oSew@e{$;N7-^Ei##lxgdFRzmzI-kZ;&# IS((k80E*WzCIA2c delta 141 zcmbR2bHSU-CD!N}w;Hu=1iibMfJasgw? zB3GfwDU1aSDFuuP1q_Lce0e5|N&7K#PEMShBkf?i(0qaE$Yd_&0!A#7lg~-}ZBCNe$O!;@8!4;+ diff --git a/tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x b/tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x index 497706c9742a9ea5396d6c9c4cc1cc2a4a530339..e95258cbd8681103a642f8973bd1ac9ef229cff7 100644 GIT binary patch delta 173 zcmbQ5bR&t&CDfOmyCjpp-(+FQ4NShyn~zG0Ffw^bO};IqqRip!>g6NE z^uK^1xqvZck%QRe6vhIElmf^zn{W$n0PEO?dKRI=BhO~py0+q=z z`YiFmPVoWGo(2IyuEEYhzF~|7$sHimQW;89C*PI!lPX|HTI9qsH7T{A87z^+kn9}d V7MaY&Jo&%0Ix|qqW=)wlA^>)AH8}tP delta 141 zcmcbSG&PCKCDkajR#XuiO7a*RGpe6Uk|fU~DTK#*&& ebC7QsqjQK`WHJ|X0V5X4$+x8aHs{H_5&-}%Gbwuj diff --git a/tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator b/tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator index 3aaa2bbdf54a0d0cade14421e84c6ec5a42f96fa..ba2a7d0004be7cd7220716dc7e8594be87197b98 100644 GIT binary patch delta 200 zcmdnx{MVVwCD=Psgp~j9h4TROpeiKi4S&)4{-K02nccwb`J6lV=PGS0GXD`P?|dVrL><^0YlOv uCzh#6sRhkoi6n+(=McBZWG?2(deUN(cS{Mf0=1PUO+GIrw%J!EmlXghfj&V1 delta 162 zcmezCyvv!(CDHmKYXFu0)A*TNY49NwIDT`c%CZ{kKFr*YPCKNCvF7oA>tS9Zq%sDx6a*MQs=|b}b zrjujzS>l78;scyL4FZB(gPntX!x)`I+#-{?mUq!chF6fh(fFr+T>fHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRvf+r*iaw} diff --git a/tests/data/acpi/x86/q35/DSDT.bridge b/tests/data/acpi/x86/q35/DSDT.bridge index d9938dba8fa5d405f7696c0dbdc24f3ae42ec934..1939fda2507cde6fcb6f7a093897f9bd2cb987ef 100644 GIT binary patch delta 180 zcmX>Q+Z@N`66_Mftk1x}sIifYU6RSkd9twN1||oK$wwvsvU^H72Rr+5Og4}apZq|I zLz%3;!3asgw?B7f1zDU1aSDFuuP1q_J=45^F!xF<>ba_~)@oXGWma_Zy& zX$PeRDwAXMS>l78;scyL4FZB(gPntX!x#&aJ3xk|GL)uHJ}T`eRltz6$cbfYQffgn cSR#ob**U~5GMS5c@^fi*W}ueMf->uc0FDnfXaE2J delta 109 zcmV-z0FwWMUcg-nL{mgmz%Bp)0aLLG2N?oSK9ds}umVjplf@bR0!9{-#2O?RZ~$X) z0c4^?5|d;BZ~$a*0bpfHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRvfJo@Ma)J diff --git a/tests/data/acpi/x86/q35/DSDT.core-count b/tests/data/acpi/x86/q35/DSDT.core-count index a24b04cbdbf09383b933a42a2a15182545543a87..41c0832ab5041ff5361598813ec28fe7442b191b 100644 GIT binary patch delta 168 zcmeyEvL%(vCD>}qURltz6$cbfYQffgnSR#ob**U~5GMS5c Ra+;hvbC7S?<_U5Sg#j0XG*JKm delta 134 zcmV;10D1qkW$|PRL{mgmaWVh^0k5$N2N?p5TayzRu>y)vv&I>E0|!SILsLUe2$O&u z7n5Ec2pDhxV{idvqD2yuWC3shWN-msZ~$SVO$?KX9ZmxYlVOwJ9Y6%3H=s3>As!VR oLsUjtV+sRs0bDNxUsFO~P(w*DP*O!xLsCsvlR+L%v(+Bo6Qh?Xg#Z8m diff --git a/tests/data/acpi/x86/q35/DSDT.core-count2 b/tests/data/acpi/x86/q35/DSDT.core-count2 index 3a0cb8c581c8cc630a2ec21712b7f8b75fcad1c8..153b45f0f7443d25cecc2a752fb6dbd921160e78 100644 GIT binary patch delta 160 zcmaFW&a}3JiOVI}B}BJ{fr0VbMlNDW6B~&vB@cn1q>+#j0pt{i3JR)iyS6vst0iJO`M#_^?!2eWMuM^nk22F%;D_n*>_hQtDf)J6W>lWnB^IQS+`PUQMOIdyWcv;)%umB|;S zMY#)-J3zWq8A?+pGs*bL6fh($a$=d9lv>aXmPleqb`EiiOy*)PNS=H^N|iatH*B+? Hj4B%dq#Q5J delta 124 zcmV-?0E7RwMe{@oL{mgmb0Po$0h_T32N?oSTayzRumVj`v&9)X0Rl%Bk{cu#Z~$X) z0c4^@5|d;BZ~$a*0bpmkO;(ft8&0#693uw7dL)Se diff --git a/tests/data/acpi/x86/q35/DSDT.cxl b/tests/data/acpi/x86/q35/DSDT.cxl index 3c34d4dcab16783abe65f6fa5e64eb69d40795fb..0f1ccdfcc3ffbf151c172015cc4bf18bc4ead218 100644 GIT binary patch delta 180 zcmcbWHaDHiCDUq!chF6fh(fFr+T>fHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRvbqc@Dv~7 diff --git a/tests/data/acpi/x86/q35/DSDT.dimmpxm b/tests/data/acpi/x86/q35/DSDT.dimmpxm index 228374b55bd544116e359f659e546fc66cf8a895..eb5b6e9f52107d9c95e38e94a67a6b5001beafc1 100644 GIT binary patch delta 173 zcmbQ~cgK&*CDg6NE z^uK^1xqvZck%QRe6vhIElmf_0v{W$n0PEO?dKRI=BhqQyz0+q=z z`YiFmPVoWGo(2IyuEEYhzF~|7$sHimQW;89C;yf9lPX|HTI9qsH7T{A87z^+kn9}d V7MaY&JXu~wof)WQv!~1jE&ziMG`Ro( delta 141 zcmccPH`9;HCDNE4v?P;9z+`pF4NPvXn{P@gFfzG|P5v#VB2mDQT)>#J z$W>@^3S$97N&#a+0Yl;;KKX$Z zhcbt=tCx=u)Bgg7vQVJLo3K$X#7*ZGcaZi%=<=~q*Ig#uCMBkrT_*q|}0D dutX9=vU7-AWHJ}?fHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRvg;|6`>%) diff --git a/tests/data/acpi/x86/q35/DSDT.ipmismbus b/tests/data/acpi/x86/q35/DSDT.ipmismbus index e5d6811bee1233d74236453c49060390d74d4416..d04d215a1d0fbc77739084d100a35af47a1c1a62 100644 GIT binary patch delta 180 zcmezExX+2pCDUq!chF6fh(fFr+T>fHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRvhRAPqrav diff --git a/tests/data/acpi/x86/q35/DSDT.ivrs b/tests/data/acpi/x86/q35/DSDT.ivrs index 46fd25400b7c00ee9149ddb64cb5d5bd73f6a82b..dbd4f858354df0f4c050fd0b914581154f340ee8 100644 GIT binary patch delta 180 zcmdnu_}!7qCD*Tg0oSn#2N?oSK9ds}umVjplf@bR0!9{-#2O?RZ~$X) z0c4^?5|d;BZ~$a*0bpfHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRve)OE4?7a diff --git a/tests/data/acpi/x86/q35/DSDT.memhp b/tests/data/acpi/x86/q35/DSDT.memhp index 5ce081187a578ba7145a9ba20d30be36c13b7663..f73ade9bf6e4545f9912ed654a282884a54cec79 100644 GIT binary patch delta 180 zcmez5z14@yCDdw@xewec1b1==gGp78<^ZICLfjj%kC-V9PI4JG1)*yeDVV+ z4rLB!S1%tSrvC*D$pwrli~L0=r!W>Uq!chF6fh(fFr+T> delta 109 zcmV-z0FwW;O!7+#L{mgm@+ANO0V%Nx2N?oIKa&#~umVFglf@bR0!9{-#2O?RZ~$X) z0c4^?5|d;BZ~$a*0bpfHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRvh0624*0f diff --git a/tests/data/acpi/x86/q35/DSDT.mmio64 b/tests/data/acpi/x86/q35/DSDT.mmio64 index bdf36c4d575bfc4eb2eac3f00c9b7b4270f88677..f0ddb4c83cdc9afdf4f289a66ed6bf0d630fd623 100644 GIT binary patch delta 180 zcmeD6y5Pm-66_KZpvu6&xNsvEyCjo`^JHPk4NPtplaEUNW%rbF4tDnAm~0>;KKX$Z zhcbt=tCx=u)Bgg7vQVJLo3K$X#7*ZGcaZi%=<=~q*Ig#uCMBkrT_*q|}0D dutX9=vU7-AWHJ}?fHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRvh{W)LtM& diff --git a/tests/data/acpi/x86/q35/DSDT.multi-bridge b/tests/data/acpi/x86/q35/DSDT.multi-bridge index 1db43a69e4c2affd8bd678bbef4d3c228380288e..3ad19e3f5e480db1c449b838c83833f7665186cd 100644 GIT binary patch delta 180 zcmbP{emkAZCDUq!chF6fh(fFr+T>fHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRvd*F{DB}m diff --git a/tests/data/acpi/x86/q35/DSDT.noacpihp b/tests/data/acpi/x86/q35/DSDT.noacpihp index 8bc16887e1c963c61aaecf71712a09c0554f6d67..9f7261d1b06bbf5d8a3e5a7a46b247a2a21eb544 100644 GIT binary patch delta 206 zcmZ4O@XmqDCDreFm{7owSiq3F$d7ySW=UUuzKN3) zx&BX1<@yg)wzOb!ptSI0St&K81uBzc^jYGAo#F$WJq-eaT!Wp1e8U(Ek~=^Kr!thL zPOgyhlPX|HTI9qsH7T{A87z^+kn9}d7MaY&Jb9LsIxA31Y0~6)NwLj$r2JU{in&3W delta 161 zcmaFou-bvkCDV3(Xgp yPL9!Mi4S&)4{-K02nccwb`J6lV{{I2i%jNXE?~qWIk{BIZ*sh(*k*ZYe^vm=gF-S8<-p{CYwwCW%rbF4tDnAn0!}KT${t$ z&ox|#=|6|FtCtUuE?`J5U`$!$FFHAev4A0^fH9$fA+dlVb&((U$vpf!_R uF9csxLSIlrNia}SMN>mkO;!OzR7P223Img|8x@l@8%~oR8WywO8+rsM6)9){ diff --git a/tests/data/acpi/x86/q35/DSDT.numamem b/tests/data/acpi/x86/q35/DSDT.numamem index ba6669437e65952f24516ded954b33fe54bdedfb..2867f5b44498d788fc0effd0bf616317821be88e 100644 GIT binary patch delta 180 zcmZ4K_{NdTCDfHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRveZD8^j=y diff --git a/tests/data/acpi/x86/q35/DSDT.pvpanic-isa b/tests/data/acpi/x86/q35/DSDT.pvpanic-isa index 6ad42873e91c80cef5a42224cb4d31936dad59b4..02cc07f010f880684216ba8925c8f3f55cfd80aa 100644 GIT binary patch delta 180 zcmeBhI_<>e66_M;KKX$Z zhcbt=tCx=u)Bgg7vQVJLo3K$X#7*ZGcaZi%=<=~q*Ig#uCMBkrT_*q|}0D dutX9=vU7-AWHJ}?fHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRvh&N%k3a4 diff --git a/tests/data/acpi/x86/q35/DSDT.thread-count b/tests/data/acpi/x86/q35/DSDT.thread-count index a24b04cbdbf09383b933a42a2a15182545543a87..41c0832ab5041ff5361598813ec28fe7442b191b 100644 GIT binary patch delta 168 zcmeyEvL%(vCD>}qURltz6$cbfYQffgnSR#ob**U~5GMS5c Ra+;hvbC7S?<_U5Sg#j0XG*JKm delta 134 zcmV;10D1qkW$|PRL{mgmaWVh^0k5$N2N?p5TayzRu>y)vv&I>E0|!SILsLUe2$O&u z7n5Ec2pDhxV{idvqD2yuWC3shWN-msZ~$SVO$?KX9ZmxYlVOwJ9Y6%3H=s3>As!VR oLsUjtV+sRs0bDNxUsFO~P(w*DP*O!xLsCsvlR+L%v(+Bo6Qh?Xg#Z8m diff --git a/tests/data/acpi/x86/q35/DSDT.thread-count2 b/tests/data/acpi/x86/q35/DSDT.thread-count2 index 3a0cb8c581c8cc630a2ec21712b7f8b75fcad1c8..153b45f0f7443d25cecc2a752fb6dbd921160e78 100644 GIT binary patch delta 160 zcmaFW&a}3JiOVI}B}BJ{fr0VbMlNDW6B~&vB@cn1q>+#j0pt{i3JR)iyS6vst0iJO`M#_^?!2e3;!3asgw?B7f1zDU1aSDFuuP1q_J=45^F!xF<>ba_~)@oXGWma_Zy& zX$PeRDwAXMS>l78;scyL4FZB(gPntX!x#&aJ3xk|GL)uHJ}T`eRltz6$cbfYQffgn cSR#ob**U~5GMS5c@^fi*W}ueMf-)c20H4n`x&QzG delta 109 zcmV-z0FwX2MuA2OL{mgm0V4na0cWua2N?oSK9ds}umVjplf@bR0!9{-#2O?RZ~$X) z0c4^?5|d;BZ~$a*0bpfHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRvhjImmMFN diff --git a/tests/data/acpi/x86/q35/DSDT.tis.tpm2 b/tests/data/acpi/x86/q35/DSDT.tis.tpm2 index a09253042ce4a715922027245de8a2ab7449c5b7..b05563deedc65df50f35b2399862d9ee8d4d1e0e 100644 GIT binary patch delta 180 zcmbR3cF&E=CDUq!chF6fh(fFr+T>fHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRvZWin{Xbx diff --git a/tests/data/acpi/x86/q35/DSDT.type4-count b/tests/data/acpi/x86/q35/DSDT.type4-count index edc23198cdb47a981bcbc82bc8e392b815abb554..00807e7fd4d758bc2ab9c69ac8869cf6864399f7 100644 GIT binary patch delta 200 zcmbO`k@3MqMlP3Nmyib@3=E8mHgd5`GW8sqEG)T^sdM}0W0I5D*uA8jgPr|2CNnCC zYjZgJxrPfd{pWCY_3{DI1q{grj46v8#3rXO7BHj~FeVf*Bo;8FF7oG|JWaumgKy&G zM6UmnQzt)Ha8O#HGC4+{B|g|GKET=2ARx#!*g42IjIkiO17un%Luu;d7)3v+0*0hT wPApTCQVW{F5=jin&LM7*$z05n7b=KN)>9B<2Wl%#idLVTs35#~zv4YL0HE7HJpcdz delta 163 zcmaDbk#X)sMlP3Nmyo$03=E97H*&E{GIj2oEG)T^scrM-W0I5D*xkjPgPr|2CNnCC zD{wgbxrPfd{V!lhE?`Vq(KT@%~0Gc1b2D=gGp78<-p{CLfjj%kC-V9PI4JG1)*yeDVV+ z4rLB!S1%tSrvC*D$pwrli~L0=r!W>Uq!chF6fh(fFr+T>fHItJ;91{ouLsUjtV+sS4 PKO7a4!W&MrRvZBxw>BRO diff --git a/tests/data/acpi/x86/q35/DSDT.xapic b/tests/data/acpi/x86/q35/DSDT.xapic index d4acd851c62c956436a436f9fa6d08fc5f370fa7..227d421f16ed1824a87e8a91da734828f8b48cbf 100644 GIT binary patch delta 195 zcmZph&UA7*6PHV{OUTJ?1_s7U8@bpenOZ+i7M9$|)bwogG091M?A}t&!OnghlNmL{ zwK$ypT*HN!{&P6Hdien90*2%Q#*{^lVv|!C3m8%g7!wK@5(^ko7dcFxsu94!H*slV4~!C@oN#9HY+?AM6w#;OuD-5ab%{9ON6uSdiQSGAxy$G<9;Urk_*+L((EA rmZ?dp1XQ>Sgf}11JeCLm_4q#( delta 162 zcmX>(ovCd)6PHV{OGsNc0|VpRja=-KOigbm3rlWf>VLTTnB*irb`LS=Ym1>}e1X Date: Thu, 12 Dec 2024 16:37:38 +0800 Subject: [PATCH 06/49] intel_iommu: Use the latest fault reasons defined by spec MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Spec revision 3.0 or above defines more detailed fault reasons for scalable mode. So introduce them into emulation code, see spec section 7.1.2 for details. Note spec revision has no relation with VERSION register, Guest kernel should not use that register to judge what features are supported. Instead cap/ecap bits should be checked. Signed-off-by: Yu Zhang Signed-off-by: Zhenzhong Duan Reviewed-by: Clément Mathieu--Drif Reviewed-by: Yi Liu Acked-by: Jason Wang Message-Id: <20241212083757.605022-2-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 25 ++++++++++++++++--------- hw/i386/intel_iommu_internal.h | 9 ++++++++- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index a8c275f9ce..0ab1676d5f 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -796,7 +796,7 @@ static int vtd_get_pdire_from_pdir_table(dma_addr_t pasid_dir_base, addr = pasid_dir_base + index * entry_size; if (dma_memory_read(&address_space_memory, addr, pdire, entry_size, MEMTXATTRS_UNSPECIFIED)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_DIR_ACCESS_ERR; } pdire->val = le64_to_cpu(pdire->val); @@ -814,6 +814,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, dma_addr_t addr, VTDPASIDEntry *pe) { + uint8_t pgtt; uint32_t index; dma_addr_t entry_size; X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); @@ -823,7 +824,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, addr = addr + index * entry_size; if (dma_memory_read(&address_space_memory, addr, pe, entry_size, MEMTXATTRS_UNSPECIFIED)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_TABLE_ACCESS_ERR; } for (size_t i = 0; i < ARRAY_SIZE(pe->val); i++) { pe->val[i] = le64_to_cpu(pe->val[i]); @@ -831,11 +832,13 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, /* Do translation type check */ if (!vtd_pe_type_check(x86_iommu, pe)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_TABLE_ENTRY_INV; } - if (!vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) { - return -VTD_FR_PASID_TABLE_INV; + pgtt = VTD_PE_GET_TYPE(pe); + if (pgtt == VTD_SM_PASID_ENTRY_SLT && + !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) { + return -VTD_FR_PASID_TABLE_ENTRY_INV; } return 0; @@ -876,7 +879,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s, } if (!vtd_pdire_present(&pdire)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_DIR_ENTRY_P; } ret = vtd_get_pe_from_pdire(s, pasid, &pdire, pe); @@ -885,7 +888,7 @@ static int vtd_get_pe_from_pasid_table(IntelIOMMUState *s, } if (!vtd_pe_present(pe)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_ENTRY_P; } return 0; @@ -938,7 +941,7 @@ static int vtd_ce_get_pasid_fpd(IntelIOMMUState *s, } if (!vtd_pdire_present(&pdire)) { - return -VTD_FR_PASID_TABLE_INV; + return -VTD_FR_PASID_DIR_ENTRY_P; } /* @@ -1795,7 +1798,11 @@ static const bool vtd_qualified_faults[] = { [VTD_FR_ROOT_ENTRY_RSVD] = false, [VTD_FR_PAGING_ENTRY_RSVD] = true, [VTD_FR_CONTEXT_ENTRY_TT] = true, - [VTD_FR_PASID_TABLE_INV] = false, + [VTD_FR_PASID_DIR_ACCESS_ERR] = false, + [VTD_FR_PASID_DIR_ENTRY_P] = true, + [VTD_FR_PASID_TABLE_ACCESS_ERR] = false, + [VTD_FR_PASID_ENTRY_P] = true, + [VTD_FR_PASID_TABLE_ENTRY_INV] = true, [VTD_FR_SM_INTERRUPT_ADDR] = true, [VTD_FR_MAX] = false, }; diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 4323fc5d6d..a987023692 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -311,7 +311,14 @@ typedef enum VTDFaultReason { * request while disabled */ VTD_FR_IR_SID_ERR = 0x26, /* Invalid Source-ID */ - VTD_FR_PASID_TABLE_INV = 0x58, /*Invalid PASID table entry */ + /* PASID directory entry access failure */ + VTD_FR_PASID_DIR_ACCESS_ERR = 0x50, + /* The Present(P) field of pasid directory entry is 0 */ + VTD_FR_PASID_DIR_ENTRY_P = 0x51, + VTD_FR_PASID_TABLE_ACCESS_ERR = 0x58, /* PASID table entry access failure */ + /* The Present(P) field of pasid table entry is 0 */ + VTD_FR_PASID_ENTRY_P = 0x59, + VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b, /*Invalid PASID table entry */ /* Output address in the interrupt address range for scalable mode */ VTD_FR_SM_INTERRUPT_ADDR = 0x87, From b291dae33d1dab48670b86807a71cb1cbadf39aa Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:39 +0800 Subject: [PATCH 07/49] intel_iommu: Make pasid entry type check accurate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When guest configures Nested Translation(011b) or First-stage Translation only (001b), type check passed unaccurately. Fails the type check in those cases as their simulation isn't supported yet. Fixes: fb43cf739e1 ("intel_iommu: scalable mode emulation") Suggested-by: Yi Liu Signed-off-by: Zhenzhong Duan Reviewed-by: Clément Mathieu--Drif Reviewed-by: Yi Liu Acked-by: Jason Wang Message-Id: <20241212083757.605022-3-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 0ab1676d5f..bd639b7ff7 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -759,20 +759,16 @@ static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu, VTDPASIDEntry *pe) { switch (VTD_PE_GET_TYPE(pe)) { - case VTD_SM_PASID_ENTRY_FLT: case VTD_SM_PASID_ENTRY_SLT: - case VTD_SM_PASID_ENTRY_NESTED: - break; + return true; case VTD_SM_PASID_ENTRY_PT: - if (!x86_iommu->pt_supported) { - return false; - } - break; + return x86_iommu->pt_supported; + case VTD_SM_PASID_ENTRY_FLT: + case VTD_SM_PASID_ENTRY_NESTED: default: /* Unknown type */ return false; } - return true; } static inline bool vtd_pdire_present(VTDPASIDDirEntry *pdire) From 791346f93d2aa3b7eaebf4a12f4b7c558e94ff6b Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:40 +0800 Subject: [PATCH 08/49] intel_iommu: Add a placeholder variable for scalable mode stage-1 translation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an new element flts in IntelIOMMUState to mark stage-1 translation support in scalable mode, this element will be exposed as an intel_iommu property x-flts finally. For now, it's only a placehholder and used for address width compatibility check and block host device passthrough until nesting is supported. Signed-off-by: Yi Liu Signed-off-by: Zhenzhong Duan Acked-by: Jason Wang Reviewed-by: Clément Mathieu--Drif Reviewed-by: Yi Liu Message-Id: <20241212083757.605022-4-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 23 ++++++++++++++++++----- include/hw/i386/intel_iommu.h | 1 + 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index bd639b7ff7..d0c1d73974 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3917,7 +3917,13 @@ static bool vtd_check_hiod(IntelIOMMUState *s, HostIOMMUDevice *hiod, return false; } - return true; + if (!s->flts) { + /* All checks requested by VTD stage-2 translation pass */ + return true; + } + + error_setg(errp, "host device is uncompatible with stage-1 translation"); + return false; } static bool vtd_dev_set_iommu_device(PCIBus *bus, void *opaque, int devfn, @@ -4307,14 +4313,21 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) } } - /* Currently only address widths supported are 39 and 48 bits */ - if ((s->aw_bits != VTD_HOST_AW_39BIT) && - (s->aw_bits != VTD_HOST_AW_48BIT)) { - error_setg(errp, "Supported values for aw-bits are: %d, %d", + if (!s->flts && s->aw_bits != VTD_HOST_AW_39BIT && + s->aw_bits != VTD_HOST_AW_48BIT) { + error_setg(errp, "%s: supported values for aw-bits are: %d, %d", + s->scalable_mode ? "Scalable mode(flts=off)" : "Legacy mode", VTD_HOST_AW_39BIT, VTD_HOST_AW_48BIT); return false; } + if (s->flts && s->aw_bits != VTD_HOST_AW_48BIT) { + error_setg(errp, + "Scalable mode(flts=on): supported value for aw-bits is: %d", + VTD_HOST_AW_48BIT); + return false; + } + if (s->scalable_mode && !s->dma_drain) { error_setg(errp, "Need to set dma_drain for scalable mode"); return false; diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index d372cd396b..b19f3004f0 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -262,6 +262,7 @@ struct IntelIOMMUState { bool caching_mode; /* RO - is cap CM enabled? */ bool scalable_mode; /* RO - is Scalable Mode supported? */ + bool flts; /* RO - is stage-1 translation supported? */ bool snoop_control; /* RO - is SNP filed supported? */ dma_addr_t root; /* Current root table pointer */ From ad0a7f1e1edbe841d4285a6b56f071eb3de9c86c Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:41 +0800 Subject: [PATCH 09/49] intel_iommu: Flush stage-2 cache in PASID-selective PASID-based iotlb invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per VT-d spec 4.1, 6.5.2.4, "Table 21. PASID-based-IOTLB Invalidation", PADID-selective PASID-based iotlb invalidation will flush stage-2 iotlb entries with matching domain id and pasid. With stage-1 translation introduced, guest could send PASID-selective PASID-based iotlb invalidation to flush either stage-1 or stage-2 entries. By this chance, remove old IOTLB related definitions which were unused. Signed-off-by: Zhenzhong Duan Reviewed-by: Clément Mathieu--Drif Acked-by: Jason Wang Message-Id: <20241212083757.605022-5-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 85 +++++++++++++++++++++++++++++++++- hw/i386/intel_iommu_internal.h | 14 ++++-- 2 files changed, 93 insertions(+), 6 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index d0c1d73974..bb1f43c4b3 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2692,6 +2692,83 @@ static bool vtd_process_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) return true; } +static gboolean vtd_hash_remove_by_pasid(gpointer key, gpointer value, + gpointer user_data) +{ + VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; + VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; + + return ((entry->domain_id == info->domain_id) && + (entry->pasid == info->pasid)); +} + +static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s, + uint16_t domain_id, uint32_t pasid) +{ + VTDIOTLBPageInvInfo info; + VTDAddressSpace *vtd_as; + VTDContextEntry ce; + + info.domain_id = domain_id; + info.pasid = pasid; + + vtd_iommu_lock(s); + g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_pasid, + &info); + vtd_iommu_unlock(s); + + QLIST_FOREACH(vtd_as, &s->vtd_as_with_notifiers, next) { + if (!vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), + vtd_as->devfn, &ce) && + domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { + uint32_t rid2pasid = VTD_CE_GET_RID2PASID(&ce); + + if ((vtd_as->pasid != PCI_NO_PASID || pasid != rid2pasid) && + vtd_as->pasid != pasid) { + continue; + } + + if (!s->flts) { + vtd_address_space_sync(vtd_as); + } + } + } +} + +static bool vtd_process_piotlb_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) +{ + uint16_t domain_id; + uint32_t pasid; + uint64_t mask[4] = {VTD_INV_DESC_PIOTLB_RSVD_VAL0, + VTD_INV_DESC_PIOTLB_RSVD_VAL1, + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true, + __func__, "piotlb inv")) { + return false; + } + + domain_id = VTD_INV_DESC_PIOTLB_DID(inv_desc->val[0]); + pasid = VTD_INV_DESC_PIOTLB_PASID(inv_desc->val[0]); + switch (inv_desc->val[0] & VTD_INV_DESC_PIOTLB_G) { + case VTD_INV_DESC_PIOTLB_ALL_IN_PASID: + vtd_piotlb_pasid_invalidate(s, domain_id, pasid); + break; + + case VTD_INV_DESC_PIOTLB_PSI_IN_PASID: + break; + + default: + error_report_once("%s: invalid piotlb inv desc: hi=0x%"PRIx64 + ", lo=0x%"PRIx64" (type mismatch: 0x%llx)", + __func__, inv_desc->val[1], inv_desc->val[0], + inv_desc->val[0] & VTD_INV_DESC_IOTLB_G); + return false; + } + return true; +} + static bool vtd_process_inv_iec_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { @@ -2810,6 +2887,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) } break; + case VTD_INV_DESC_PIOTLB: + trace_vtd_inv_desc("p-iotlb", inv_desc.val[1], inv_desc.val[0]); + if (!vtd_process_piotlb_desc(s, &inv_desc)) { + return false; + } + break; + case VTD_INV_DESC_WAIT: trace_vtd_inv_desc("wait", inv_desc.hi, inv_desc.lo); if (!vtd_process_wait_desc(s, &inv_desc)) { @@ -2837,7 +2921,6 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) * iommu driver) work, just return true is enough so far. */ case VTD_INV_DESC_PC: - case VTD_INV_DESC_PIOTLB: if (s->scalable_mode) { break; } diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index a987023692..48019e2005 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -404,11 +404,6 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_IOTLB_AM(val) ((val) & 0x3fULL) #define VTD_INV_DESC_IOTLB_RSVD_LO 0xffffffff0000f100ULL #define VTD_INV_DESC_IOTLB_RSVD_HI 0xf80ULL -#define VTD_INV_DESC_IOTLB_PASID_PASID (2ULL << 4) -#define VTD_INV_DESC_IOTLB_PASID_PAGE (3ULL << 4) -#define VTD_INV_DESC_IOTLB_PASID(val) (((val) >> 32) & VTD_PASID_ID_MASK) -#define VTD_INV_DESC_IOTLB_PASID_RSVD_LO 0xfff00000000001c0ULL -#define VTD_INV_DESC_IOTLB_PASID_RSVD_HI 0xf80ULL /* Mask for Device IOTLB Invalidate Descriptor */ #define VTD_INV_DESC_DEVICE_IOTLB_ADDR(val) ((val) & 0xfffffffffffff000ULL) @@ -443,6 +438,15 @@ typedef union VTDInvDesc VTDInvDesc; (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \ (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +/* Masks for PIOTLB Invalidate Descriptor */ +#define VTD_INV_DESC_PIOTLB_G (3ULL << 4) +#define VTD_INV_DESC_PIOTLB_ALL_IN_PASID (2ULL << 4) +#define VTD_INV_DESC_PIOTLB_PSI_IN_PASID (3ULL << 4) +#define VTD_INV_DESC_PIOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK) +#define VTD_INV_DESC_PIOTLB_PASID(val) (((val) >> 32) & 0xfffffULL) +#define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL +#define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL + /* Information about page-selective IOTLB invalidate */ struct VTDIOTLBPageInvInfo { uint16_t domain_id; From eda4c9b5b3c46f8d4d6a628cc7a588f187f30050 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Thu, 12 Dec 2024 16:37:42 +0800 Subject: [PATCH 10/49] intel_iommu: Rename slpte to pte MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because we will support both FST(a.k.a, FLT) and SST(a.k.a, SLT) translation, rename variable and functions from slpte to pte whenever possible. But some are SST only, they are renamed with sl_ prefix. Signed-off-by: Yi Liu Co-developed-by: Clément Mathieu--Drif Signed-off-by: Clément Mathieu--Drif Signed-off-by: Yi Sun Signed-off-by: Zhenzhong Duan Acked-by: Jason Wang Reviewed-by: Yi Liu Message-Id: <20241212083757.605022-6-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 129 +++++++++++++++++---------------- hw/i386/intel_iommu_internal.h | 24 +++--- include/hw/i386/intel_iommu.h | 2 +- 3 files changed, 78 insertions(+), 77 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index bb1f43c4b3..dfac5982d6 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -48,7 +48,8 @@ /* pe operations */ #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT) -#define VTD_PE_GET_LEVEL(pe) (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW)) +#define VTD_PE_GET_SL_LEVEL(pe) \ + (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW)) /* * PCI bus number (or SID) is not reliable since the device is usaully @@ -284,15 +285,15 @@ static gboolean vtd_hash_remove_by_domain(gpointer key, gpointer value, } /* The shift of an addr for a certain level of paging structure */ -static inline uint32_t vtd_slpt_level_shift(uint32_t level) +static inline uint32_t vtd_pt_level_shift(uint32_t level) { assert(level != 0); - return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_SL_LEVEL_BITS; + return VTD_PAGE_SHIFT_4K + (level - 1) * VTD_LEVEL_BITS; } -static inline uint64_t vtd_slpt_level_page_mask(uint32_t level) +static inline uint64_t vtd_pt_level_page_mask(uint32_t level) { - return ~((1ULL << vtd_slpt_level_shift(level)) - 1); + return ~((1ULL << vtd_pt_level_shift(level)) - 1); } static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, @@ -349,7 +350,7 @@ static void vtd_reset_caches(IntelIOMMUState *s) static uint64_t vtd_get_iotlb_gfn(hwaddr addr, uint32_t level) { - return (addr & vtd_slpt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; + return (addr & vtd_pt_level_page_mask(level)) >> VTD_PAGE_SHIFT_4K; } /* Must be called with IOMMU lock held */ @@ -360,7 +361,7 @@ static VTDIOTLBEntry *vtd_lookup_iotlb(IntelIOMMUState *s, uint16_t source_id, VTDIOTLBEntry *entry; unsigned level; - for (level = VTD_SL_PT_LEVEL; level < VTD_SL_PML4_LEVEL; level++) { + for (level = VTD_PT_LEVEL; level < VTD_PML4_LEVEL; level++) { key.gfn = vtd_get_iotlb_gfn(addr, level); key.level = level; key.sid = source_id; @@ -377,7 +378,7 @@ out: /* Must be with IOMMU lock held */ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, - uint16_t domain_id, hwaddr addr, uint64_t slpte, + uint16_t domain_id, hwaddr addr, uint64_t pte, uint8_t access_flags, uint32_t level, uint32_t pasid) { @@ -385,7 +386,7 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, struct vtd_iotlb_key *key = g_malloc(sizeof(*key)); uint64_t gfn = vtd_get_iotlb_gfn(addr, level); - trace_vtd_iotlb_page_update(source_id, addr, slpte, domain_id); + trace_vtd_iotlb_page_update(source_id, addr, pte, domain_id); if (g_hash_table_size(s->iotlb) >= VTD_IOTLB_MAX_SIZE) { trace_vtd_iotlb_reset("iotlb exceeds size limit"); vtd_reset_iotlb_locked(s); @@ -393,9 +394,9 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, entry->gfn = gfn; entry->domain_id = domain_id; - entry->slpte = slpte; + entry->pte = pte; entry->access_flags = access_flags; - entry->mask = vtd_slpt_level_page_mask(level); + entry->mask = vtd_pt_level_page_mask(level); entry->pasid = pasid; key->gfn = gfn; @@ -710,32 +711,32 @@ static inline dma_addr_t vtd_ce_get_slpt_base(VTDContextEntry *ce) return ce->lo & VTD_CONTEXT_ENTRY_SLPTPTR; } -static inline uint64_t vtd_get_slpte_addr(uint64_t slpte, uint8_t aw) +static inline uint64_t vtd_get_pte_addr(uint64_t pte, uint8_t aw) { - return slpte & VTD_SL_PT_BASE_ADDR_MASK(aw); + return pte & VTD_PT_BASE_ADDR_MASK(aw); } /* Whether the pte indicates the address of the page frame */ -static inline bool vtd_is_last_slpte(uint64_t slpte, uint32_t level) +static inline bool vtd_is_last_pte(uint64_t pte, uint32_t level) { - return level == VTD_SL_PT_LEVEL || (slpte & VTD_SL_PT_PAGE_SIZE_MASK); + return level == VTD_PT_LEVEL || (pte & VTD_PT_PAGE_SIZE_MASK); } -/* Get the content of a spte located in @base_addr[@index] */ -static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index) +/* Get the content of a pte located in @base_addr[@index] */ +static uint64_t vtd_get_pte(dma_addr_t base_addr, uint32_t index) { - uint64_t slpte; + uint64_t pte; - assert(index < VTD_SL_PT_ENTRY_NR); + assert(index < VTD_PT_ENTRY_NR); if (dma_memory_read(&address_space_memory, - base_addr + index * sizeof(slpte), - &slpte, sizeof(slpte), MEMTXATTRS_UNSPECIFIED)) { - slpte = (uint64_t)-1; - return slpte; + base_addr + index * sizeof(pte), + &pte, sizeof(pte), MEMTXATTRS_UNSPECIFIED)) { + pte = (uint64_t)-1; + return pte; } - slpte = le64_to_cpu(slpte); - return slpte; + pte = le64_to_cpu(pte); + return pte; } /* Given an iova and the level of paging structure, return the offset @@ -743,12 +744,12 @@ static uint64_t vtd_get_slpte(dma_addr_t base_addr, uint32_t index) */ static inline uint32_t vtd_iova_level_offset(uint64_t iova, uint32_t level) { - return (iova >> vtd_slpt_level_shift(level)) & - ((1ULL << VTD_SL_LEVEL_BITS) - 1); + return (iova >> vtd_pt_level_shift(level)) & + ((1ULL << VTD_LEVEL_BITS) - 1); } /* Check Capability Register to see if the @level of page-table is supported */ -static inline bool vtd_is_level_supported(IntelIOMMUState *s, uint32_t level) +static inline bool vtd_is_sl_level_supported(IntelIOMMUState *s, uint32_t level) { return VTD_CAP_SAGAW_MASK & s->cap & (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT)); @@ -833,7 +834,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, pgtt = VTD_PE_GET_TYPE(pe); if (pgtt == VTD_SM_PASID_ENTRY_SLT && - !vtd_is_level_supported(s, VTD_PE_GET_LEVEL(pe))) { + !vtd_is_sl_level_supported(s, VTD_PE_GET_SL_LEVEL(pe))) { return -VTD_FR_PASID_TABLE_ENTRY_INV; } @@ -972,7 +973,7 @@ static uint32_t vtd_get_iova_level(IntelIOMMUState *s, if (s->root_scalable) { vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); - return VTD_PE_GET_LEVEL(&pe); + return VTD_PE_GET_SL_LEVEL(&pe); } return vtd_ce_get_level(ce); @@ -1040,9 +1041,9 @@ static inline uint64_t vtd_iova_limit(IntelIOMMUState *s, } /* Return true if IOVA passes range check, otherwise false. */ -static inline bool vtd_iova_range_check(IntelIOMMUState *s, - uint64_t iova, VTDContextEntry *ce, - uint8_t aw, uint32_t pasid) +static inline bool vtd_iova_sl_range_check(IntelIOMMUState *s, + uint64_t iova, VTDContextEntry *ce, + uint8_t aw, uint32_t pasid) { /* * Check if @iova is above 2^X-1, where X is the minimum of MGAW @@ -1083,17 +1084,17 @@ static bool vtd_slpte_nonzero_rsvd(uint64_t slpte, uint32_t level) /* * We should have caught a guest-mis-programmed level earlier, - * via vtd_is_level_supported. + * via vtd_is_sl_level_supported. */ assert(level < VTD_SPTE_RSVD_LEN); /* - * Zero level doesn't exist. The smallest level is VTD_SL_PT_LEVEL=1 and - * checked by vtd_is_last_slpte(). + * Zero level doesn't exist. The smallest level is VTD_PT_LEVEL=1 and + * checked by vtd_is_last_pte(). */ assert(level); - if ((level == VTD_SL_PD_LEVEL || level == VTD_SL_PDP_LEVEL) && - (slpte & VTD_SL_PT_PAGE_SIZE_MASK)) { + if ((level == VTD_PD_LEVEL || level == VTD_PDP_LEVEL) && + (slpte & VTD_PT_PAGE_SIZE_MASK)) { /* large page */ rsvd_mask = vtd_spte_rsvd_large[level]; } else { @@ -1119,7 +1120,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, uint64_t access_right_check; uint64_t xlat, size; - if (!vtd_iova_range_check(s, iova, ce, aw_bits, pasid)) { + if (!vtd_iova_sl_range_check(s, iova, ce, aw_bits, pasid)) { error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 "," "pasid=0x%" PRIx32 ")", __func__, iova, pasid); return -VTD_FR_ADDR_BEYOND_MGAW; @@ -1130,7 +1131,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, while (true) { offset = vtd_iova_level_offset(iova, level); - slpte = vtd_get_slpte(addr, offset); + slpte = vtd_get_pte(addr, offset); if (slpte == (uint64_t)-1) { error_report_once("%s: detected read error on DMAR slpte " @@ -1161,17 +1162,17 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, return -VTD_FR_PAGING_ENTRY_RSVD; } - if (vtd_is_last_slpte(slpte, level)) { + if (vtd_is_last_pte(slpte, level)) { *slptep = slpte; *slpte_level = level; break; } - addr = vtd_get_slpte_addr(slpte, aw_bits); + addr = vtd_get_pte_addr(slpte, aw_bits); level--; } - xlat = vtd_get_slpte_addr(*slptep, aw_bits); - size = ~vtd_slpt_level_page_mask(level) + 1; + xlat = vtd_get_pte_addr(*slptep, aw_bits); + size = ~vtd_pt_level_page_mask(level) + 1; /* * From VT-d spec 3.14: Untranslated requests and translation @@ -1322,14 +1323,14 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, trace_vtd_page_walk_level(addr, level, start, end); - subpage_size = 1ULL << vtd_slpt_level_shift(level); - subpage_mask = vtd_slpt_level_page_mask(level); + subpage_size = 1ULL << vtd_pt_level_shift(level); + subpage_mask = vtd_pt_level_page_mask(level); while (iova < end) { iova_next = (iova & subpage_mask) + subpage_size; offset = vtd_iova_level_offset(iova, level); - slpte = vtd_get_slpte(addr, offset); + slpte = vtd_get_pte(addr, offset); if (slpte == (uint64_t)-1) { trace_vtd_page_walk_skip_read(iova, iova_next); @@ -1352,12 +1353,12 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, */ entry_valid = read_cur | write_cur; - if (!vtd_is_last_slpte(slpte, level) && entry_valid) { + if (!vtd_is_last_pte(slpte, level) && entry_valid) { /* * This is a valid PDE (or even bigger than PDE). We need * to walk one further level. */ - ret = vtd_page_walk_level(vtd_get_slpte_addr(slpte, info->aw), + ret = vtd_page_walk_level(vtd_get_pte_addr(slpte, info->aw), iova, MIN(iova_next, end), level - 1, read_cur, write_cur, info); } else { @@ -1374,7 +1375,7 @@ static int vtd_page_walk_level(dma_addr_t addr, uint64_t start, event.entry.perm = IOMMU_ACCESS_FLAG(read_cur, write_cur); event.entry.addr_mask = ~subpage_mask; /* NOTE: this is only meaningful if entry_valid == true */ - event.entry.translated_addr = vtd_get_slpte_addr(slpte, info->aw); + event.entry.translated_addr = vtd_get_pte_addr(slpte, info->aw); event.type = event.entry.perm ? IOMMU_NOTIFIER_MAP : IOMMU_NOTIFIER_UNMAP; ret = vtd_page_walk_one(&event, info); @@ -1408,11 +1409,11 @@ static int vtd_page_walk(IntelIOMMUState *s, VTDContextEntry *ce, dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); uint32_t level = vtd_get_iova_level(s, ce, pasid); - if (!vtd_iova_range_check(s, start, ce, info->aw, pasid)) { + if (!vtd_iova_sl_range_check(s, start, ce, info->aw, pasid)) { return -VTD_FR_ADDR_BEYOND_MGAW; } - if (!vtd_iova_range_check(s, end, ce, info->aw, pasid)) { + if (!vtd_iova_sl_range_check(s, end, ce, info->aw, pasid)) { /* Fix end so that it reaches the maximum */ end = vtd_iova_limit(s, ce, info->aw, pasid); } @@ -1527,7 +1528,7 @@ static int vtd_dev_to_context_entry(IntelIOMMUState *s, uint8_t bus_num, /* Check if the programming of context-entry is valid */ if (!s->root_scalable && - !vtd_is_level_supported(s, vtd_ce_get_level(ce))) { + !vtd_is_sl_level_supported(s, vtd_ce_get_level(ce))) { error_report_once("%s: invalid context entry: hi=%"PRIx64 ", lo=%"PRIx64" (level %d not supported)", __func__, ce->hi, ce->lo, @@ -1897,7 +1898,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, VTDContextEntry ce; uint8_t bus_num = pci_bus_num(bus); VTDContextCacheEntry *cc_entry; - uint64_t slpte, page_mask; + uint64_t pte, page_mask; uint32_t level, pasid = vtd_as->pasid; uint16_t source_id = PCI_BUILD_BDF(bus_num, devfn); int ret_fr; @@ -1918,13 +1919,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, cc_entry = &vtd_as->context_cache_entry; - /* Try to fetch slpte form IOTLB, we don't need RID2PASID logic */ + /* Try to fetch pte from IOTLB, we don't need RID2PASID logic */ if (!rid2pasid) { iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr); if (iotlb_entry) { - trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, + trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->pte, iotlb_entry->domain_id); - slpte = iotlb_entry->slpte; + pte = iotlb_entry->pte; access_flags = iotlb_entry->access_flags; page_mask = iotlb_entry->mask; goto out; @@ -1996,20 +1997,20 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, return true; } - /* Try to fetch slpte form IOTLB for RID2PASID slow path */ + /* Try to fetch pte from IOTLB for RID2PASID slow path */ if (rid2pasid) { iotlb_entry = vtd_lookup_iotlb(s, source_id, pasid, addr); if (iotlb_entry) { - trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->slpte, + trace_vtd_iotlb_page_hit(source_id, addr, iotlb_entry->pte, iotlb_entry->domain_id); - slpte = iotlb_entry->slpte; + pte = iotlb_entry->pte; access_flags = iotlb_entry->access_flags; page_mask = iotlb_entry->mask; goto out; } } - ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &slpte, &level, + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, &reads, &writes, s->aw_bits, pasid); if (ret_fr) { vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, @@ -2017,14 +2018,14 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, goto error; } - page_mask = vtd_slpt_level_page_mask(level); + page_mask = vtd_pt_level_page_mask(level); access_flags = IOMMU_ACCESS_FLAG(reads, writes); vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid), - addr, slpte, access_flags, level, pasid); + addr, pte, access_flags, level, pasid); out: vtd_iommu_unlock(s); entry->iova = addr & page_mask; - entry->translated_addr = vtd_get_slpte_addr(slpte, s->aw_bits) & page_mask; + entry->translated_addr = vtd_get_pte_addr(pte, s->aw_bits) & page_mask; entry->addr_mask = ~page_mask; entry->perm = access_flags; return true; diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 48019e2005..e810b0071f 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -533,24 +533,24 @@ typedef struct VTDRootEntry VTDRootEntry; /* Second Level Page Translation Pointer*/ #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL) -/* Paging Structure common */ -#define VTD_SL_PT_PAGE_SIZE_MASK (1ULL << 7) -/* Bits to decide the offset for each level */ -#define VTD_SL_LEVEL_BITS 9 - /* Second Level Paging Structure */ -#define VTD_SL_PML4_LEVEL 4 -#define VTD_SL_PDP_LEVEL 3 -#define VTD_SL_PD_LEVEL 2 -#define VTD_SL_PT_LEVEL 1 -#define VTD_SL_PT_ENTRY_NR 512 - /* Masks for Second Level Paging Entry */ #define VTD_SL_RW_MASK 3ULL #define VTD_SL_R 1ULL #define VTD_SL_W (1ULL << 1) -#define VTD_SL_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) #define VTD_SL_IGN_COM 0xbff0000000000000ULL #define VTD_SL_TM (1ULL << 62) +/* Common for both First Level and Second Level */ +#define VTD_PML4_LEVEL 4 +#define VTD_PDP_LEVEL 3 +#define VTD_PD_LEVEL 2 +#define VTD_PT_LEVEL 1 +#define VTD_PT_ENTRY_NR 512 +#define VTD_PT_PAGE_SIZE_MASK (1ULL << 7) +#define VTD_PT_BASE_ADDR_MASK(aw) (~(VTD_PAGE_SIZE - 1) & VTD_HAW_MASK(aw)) + +/* Bits to decide the offset for each level */ +#define VTD_LEVEL_BITS 9 + #endif diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index b19f3004f0..f44f3eb63a 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -152,7 +152,7 @@ struct VTDIOTLBEntry { uint64_t gfn; uint16_t domain_id; uint32_t pasid; - uint64_t slpte; + uint64_t pte; uint64_t mask; uint8_t access_flags; }; From eb9da9d2632839c386ecbfc50f78032c9f3a75a4 Mon Sep 17 00:00:00 2001 From: Yi Liu Date: Thu, 12 Dec 2024 16:37:43 +0800 Subject: [PATCH 11/49] intel_iommu: Implement stage-1 translation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds stage-1 page table walking to support stage-1 only translation in scalable mode. Signed-off-by: Yi Liu Co-developed-by: Clément Mathieu--Drif Signed-off-by: Clément Mathieu--Drif Signed-off-by: Yi Sun Signed-off-by: Zhenzhong Duan Acked-by: Jason Wang Message-Id: <20241212083757.605022-7-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 158 ++++++++++++++++++++++++++++++++- hw/i386/intel_iommu_internal.h | 34 +++++++ 2 files changed, 188 insertions(+), 4 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index dfac5982d6..bd6de71c02 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -48,6 +48,8 @@ /* pe operations */ #define VTD_PE_GET_TYPE(pe) ((pe)->val[0] & VTD_SM_PASID_ENTRY_PGTT) +#define VTD_PE_GET_FL_LEVEL(pe) \ + (4 + (((pe)->val[2] >> 2) & VTD_SM_PASID_ENTRY_FLPM)) #define VTD_PE_GET_SL_LEVEL(pe) \ (2 + (((pe)->val[0] >> 2) & VTD_SM_PASID_ENTRY_AW)) @@ -755,6 +757,11 @@ static inline bool vtd_is_sl_level_supported(IntelIOMMUState *s, uint32_t level) (1ULL << (level - 2 + VTD_CAP_SAGAW_SHIFT)); } +static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level) +{ + return level == VTD_PML4_LEVEL; +} + /* Return true if check passed, otherwise false */ static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu, VTDPASIDEntry *pe) @@ -838,6 +845,11 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, return -VTD_FR_PASID_TABLE_ENTRY_INV; } + if (pgtt == VTD_SM_PASID_ENTRY_FLT && + !vtd_is_fl_level_supported(s, VTD_PE_GET_FL_LEVEL(pe))) { + return -VTD_FR_PASID_TABLE_ENTRY_INV; + } + return 0; } @@ -973,7 +985,11 @@ static uint32_t vtd_get_iova_level(IntelIOMMUState *s, if (s->root_scalable) { vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); - return VTD_PE_GET_SL_LEVEL(&pe); + if (s->flts) { + return VTD_PE_GET_FL_LEVEL(&pe); + } else { + return VTD_PE_GET_SL_LEVEL(&pe); + } } return vtd_ce_get_level(ce); @@ -1060,7 +1076,11 @@ static dma_addr_t vtd_get_iova_pgtbl_base(IntelIOMMUState *s, if (s->root_scalable) { vtd_ce_get_rid2pasid_entry(s, ce, &pe, pasid); - return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; + if (s->flts) { + return pe.val[2] & VTD_SM_PASID_ENTRY_FLPTPTR; + } else { + return pe.val[0] & VTD_SM_PASID_ENTRY_SLPTPTR; + } } return vtd_ce_get_slpt_base(ce); @@ -1800,6 +1820,12 @@ static const bool vtd_qualified_faults[] = { [VTD_FR_PASID_TABLE_ACCESS_ERR] = false, [VTD_FR_PASID_ENTRY_P] = true, [VTD_FR_PASID_TABLE_ENTRY_INV] = true, + [VTD_FR_FS_PAGING_ENTRY_INV] = true, + [VTD_FR_FS_PAGING_ENTRY_P] = true, + [VTD_FR_FS_PAGING_ENTRY_RSVD] = true, + [VTD_FR_PASID_ENTRY_FSPTPTR_INV] = true, + [VTD_FR_FS_PAGING_ENTRY_US] = true, + [VTD_FR_SM_WRITE] = true, [VTD_FR_SM_INTERRUPT_ADDR] = true, [VTD_FR_MAX] = false, }; @@ -1862,6 +1888,113 @@ out: trace_vtd_pt_enable_fast_path(source_id, success); } +/* + * Rsvd field masks for fpte: + * vtd_fpte_rsvd 4k pages + * vtd_fpte_rsvd_large large pages + * + * We support only 4-level page tables. + */ +#define VTD_FPTE_RSVD_LEN 5 +static uint64_t vtd_fpte_rsvd[VTD_FPTE_RSVD_LEN]; +static uint64_t vtd_fpte_rsvd_large[VTD_FPTE_RSVD_LEN]; + +static bool vtd_flpte_nonzero_rsvd(uint64_t flpte, uint32_t level) +{ + uint64_t rsvd_mask; + + /* + * We should have caught a guest-mis-programmed level earlier, + * via vtd_is_fl_level_supported. + */ + assert(level < VTD_FPTE_RSVD_LEN); + /* + * Zero level doesn't exist. The smallest level is VTD_PT_LEVEL=1 and + * checked by vtd_is_last_pte(). + */ + assert(level); + + if ((level == VTD_PD_LEVEL || level == VTD_PDP_LEVEL) && + (flpte & VTD_PT_PAGE_SIZE_MASK)) { + /* large page */ + rsvd_mask = vtd_fpte_rsvd_large[level]; + } else { + rsvd_mask = vtd_fpte_rsvd[level]; + } + + return flpte & rsvd_mask; +} + +static inline bool vtd_flpte_present(uint64_t flpte) +{ + return !!(flpte & VTD_FL_P); +} + +/* + * Given the @iova, get relevant @flptep. @flpte_level will be the last level + * of the translation, can be used for deciding the size of large page. + */ +static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, + uint64_t iova, bool is_write, + uint64_t *flptep, uint32_t *flpte_level, + bool *reads, bool *writes, uint8_t aw_bits, + uint32_t pasid) +{ + dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); + uint32_t level = vtd_get_iova_level(s, ce, pasid); + uint32_t offset; + uint64_t flpte; + + while (true) { + offset = vtd_iova_level_offset(iova, level); + flpte = vtd_get_pte(addr, offset); + + if (flpte == (uint64_t)-1) { + if (level == vtd_get_iova_level(s, ce, pasid)) { + /* Invalid programming of pasid-entry */ + return -VTD_FR_PASID_ENTRY_FSPTPTR_INV; + } else { + return -VTD_FR_FS_PAGING_ENTRY_INV; + } + } + + if (!vtd_flpte_present(flpte)) { + *reads = false; + *writes = false; + return -VTD_FR_FS_PAGING_ENTRY_P; + } + + /* No emulated device supports supervisor privilege request yet */ + if (!(flpte & VTD_FL_US)) { + *reads = false; + *writes = false; + return -VTD_FR_FS_PAGING_ENTRY_US; + } + + *reads = true; + *writes = (*writes) && (flpte & VTD_FL_RW); + if (is_write && !(flpte & VTD_FL_RW)) { + return -VTD_FR_SM_WRITE; + } + if (vtd_flpte_nonzero_rsvd(flpte, level)) { + error_report_once("%s: detected flpte reserved non-zero " + "iova=0x%" PRIx64 ", level=0x%" PRIx32 + "flpte=0x%" PRIx64 ", pasid=0x%" PRIX32 ")", + __func__, iova, level, flpte, pasid); + return -VTD_FR_FS_PAGING_ENTRY_RSVD; + } + + if (vtd_is_last_pte(flpte, level)) { + *flptep = flpte; + *flpte_level = level; + return 0; + } + + addr = vtd_get_pte_addr(flpte, aw_bits); + level--; + } +} + static void vtd_report_fault(IntelIOMMUState *s, int err, bool is_fpd_set, uint16_t source_id, @@ -2010,8 +2143,13 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, } } - ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, - &reads, &writes, s->aw_bits, pasid); + if (s->flts && s->root_scalable) { + ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level, + &reads, &writes, s->aw_bits, pasid); + } else { + ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, + &reads, &writes, s->aw_bits, pasid); + } if (ret_fr) { vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, addr, is_write, pasid != PCI_NO_PASID, pasid); @@ -4286,6 +4424,18 @@ static void vtd_init(IntelIOMMUState *s) vtd_spte_rsvd_large[3] = VTD_SPTE_LPAGE_L3_RSVD_MASK(s->aw_bits, x86_iommu->dt_supported && s->stale_tm); + /* + * Rsvd field masks for fpte + */ + vtd_fpte_rsvd[0] = ~0ULL; + vtd_fpte_rsvd[1] = VTD_FPTE_PAGE_L1_RSVD_MASK(s->aw_bits); + vtd_fpte_rsvd[2] = VTD_FPTE_PAGE_L2_RSVD_MASK(s->aw_bits); + vtd_fpte_rsvd[3] = VTD_FPTE_PAGE_L3_RSVD_MASK(s->aw_bits); + vtd_fpte_rsvd[4] = VTD_FPTE_PAGE_L4_RSVD_MASK(s->aw_bits); + + vtd_fpte_rsvd_large[2] = VTD_FPTE_LPAGE_L2_RSVD_MASK(s->aw_bits); + vtd_fpte_rsvd_large[3] = VTD_FPTE_LPAGE_L3_RSVD_MASK(s->aw_bits); + if (s->scalable_mode || s->snoop_control) { vtd_spte_rsvd[1] &= ~VTD_SPTE_SNP; vtd_spte_rsvd_large[2] &= ~VTD_SPTE_SNP; diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index e810b0071f..86d3354198 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -320,6 +320,15 @@ typedef enum VTDFaultReason { VTD_FR_PASID_ENTRY_P = 0x59, VTD_FR_PASID_TABLE_ENTRY_INV = 0x5b, /*Invalid PASID table entry */ + /* Fail to access a first-level paging entry (not FS_PML4E) */ + VTD_FR_FS_PAGING_ENTRY_INV = 0x70, + VTD_FR_FS_PAGING_ENTRY_P = 0x71, + /* Non-zero reserved field in present first-stage paging entry */ + VTD_FR_FS_PAGING_ENTRY_RSVD = 0x72, + VTD_FR_PASID_ENTRY_FSPTPTR_INV = 0x73, /* Invalid FSPTPTR in PASID entry */ + VTD_FR_FS_PAGING_ENTRY_US = 0x81, /* Privilege violation */ + VTD_FR_SM_WRITE = 0x85, /* No write permission */ + /* Output address in the interrupt address range for scalable mode */ VTD_FR_SM_INTERRUPT_ADDR = 0x87, VTD_FR_MAX, /* Guard */ @@ -438,6 +447,22 @@ typedef union VTDInvDesc VTDInvDesc; (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM | VTD_SL_TM)) : \ (0x3ffff800ULL | ~(VTD_HAW_MASK(aw) | VTD_SL_IGN_COM)) +/* Rsvd field masks for fpte */ +#define VTD_FS_UPPER_IGNORED 0xfff0000000000000ULL +#define VTD_FPTE_PAGE_L1_RSVD_MASK(aw) \ + (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) +#define VTD_FPTE_PAGE_L2_RSVD_MASK(aw) \ + (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) +#define VTD_FPTE_PAGE_L3_RSVD_MASK(aw) \ + (~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) +#define VTD_FPTE_PAGE_L4_RSVD_MASK(aw) \ + (0x80ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) + +#define VTD_FPTE_LPAGE_L2_RSVD_MASK(aw) \ + (0x1fe000ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) +#define VTD_FPTE_LPAGE_L3_RSVD_MASK(aw) \ + (0x3fffe000ULL | ~(VTD_HAW_MASK(aw) | VTD_FS_UPPER_IGNORED)) + /* Masks for PIOTLB Invalidate Descriptor */ #define VTD_INV_DESC_PIOTLB_G (3ULL << 4) #define VTD_INV_DESC_PIOTLB_ALL_IN_PASID (2ULL << 4) @@ -530,6 +555,15 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_SM_PASID_ENTRY_AW 7ULL /* Adjusted guest-address-width */ #define VTD_SM_PASID_ENTRY_DID(val) ((val) & VTD_DOMAIN_ID_MASK) +#define VTD_SM_PASID_ENTRY_FLPM 3ULL +#define VTD_SM_PASID_ENTRY_FLPTPTR (~0xfffULL) + +/* First Level Paging Structure */ +/* Masks for First Level Paging Entry */ +#define VTD_FL_P 1ULL +#define VTD_FL_RW (1ULL << 1) +#define VTD_FL_US (1ULL << 2) + /* Second Level Page Translation Pointer*/ #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL) From 305e469b7188e5f1a896c40853d84fa158ee6ba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Mathieu--Drif?= Date: Thu, 12 Dec 2024 16:37:44 +0800 Subject: [PATCH 12/49] intel_iommu: Check if the input address is canonical MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stage-1 translation must fail if the address to translate is not canonical. Signed-off-by: Clément Mathieu--Drif Signed-off-by: Zhenzhong Duan Acked-by: Jason Wang Reviewed-by: Yi Liu Message-Id: <20241212083757.605022-8-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 23 +++++++++++++++++++++++ hw/i386/intel_iommu_internal.h | 1 + 2 files changed, 24 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index bd6de71c02..3959fe44c7 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1824,6 +1824,7 @@ static const bool vtd_qualified_faults[] = { [VTD_FR_FS_PAGING_ENTRY_P] = true, [VTD_FR_FS_PAGING_ENTRY_RSVD] = true, [VTD_FR_PASID_ENTRY_FSPTPTR_INV] = true, + [VTD_FR_FS_NON_CANONICAL] = true, [VTD_FR_FS_PAGING_ENTRY_US] = true, [VTD_FR_SM_WRITE] = true, [VTD_FR_SM_INTERRUPT_ADDR] = true, @@ -1930,6 +1931,22 @@ static inline bool vtd_flpte_present(uint64_t flpte) return !!(flpte & VTD_FL_P); } +/* Return true if IOVA is canonical, otherwise false. */ +static bool vtd_iova_fl_check_canonical(IntelIOMMUState *s, uint64_t iova, + VTDContextEntry *ce, uint32_t pasid) +{ + uint64_t iova_limit = vtd_iova_limit(s, ce, s->aw_bits, pasid); + uint64_t upper_bits_mask = ~(iova_limit - 1); + uint64_t upper_bits = iova & upper_bits_mask; + bool msb = ((iova & (iova_limit >> 1)) != 0); + + if (msb) { + return upper_bits == upper_bits_mask; + } else { + return !upper_bits; + } +} + /* * Given the @iova, get relevant @flptep. @flpte_level will be the last level * of the translation, can be used for deciding the size of large page. @@ -1945,6 +1962,12 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, uint32_t offset; uint64_t flpte; + if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) { + error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 "," + "pasid=0x%" PRIx32 ")", __func__, iova, pasid); + return -VTD_FR_FS_NON_CANONICAL; + } + while (true) { offset = vtd_iova_level_offset(iova, level); flpte = vtd_get_pte(addr, offset); diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 86d3354198..3e7365dfff 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -326,6 +326,7 @@ typedef enum VTDFaultReason { /* Non-zero reserved field in present first-stage paging entry */ VTD_FR_FS_PAGING_ENTRY_RSVD = 0x72, VTD_FR_PASID_ENTRY_FSPTPTR_INV = 0x73, /* Invalid FSPTPTR in PASID entry */ + VTD_FR_FS_NON_CANONICAL = 0x80, /* SNG.1 : Address for FS not canonical.*/ VTD_FR_FS_PAGING_ENTRY_US = 0x81, /* Privilege violation */ VTD_FR_SM_WRITE = 0x85, /* No write permission */ From fed51ee5e02d8779ccbdce555e556c4ada321281 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:45 +0800 Subject: [PATCH 13/49] intel_iommu: Check stage-1 translation result with interrupt range MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per VT-d spec 4.1 section 3.15, "Untranslated requests and translation requests that result in an address in the interrupt range will be blocked with condition code LGN.4 or SGN.8." This applies to both stage-1 and stage-2 IOMMU page table, move the check from vtd_iova_to_slpte() to vtd_do_iommu_translate() so stage-1 page table could also be checked. By this chance, update the comment with correct section number. Suggested-by: Yi Liu Signed-off-by: Zhenzhong Duan Reviewed-by: Clément Mathieu--Drif Acked-by: Jason Wang Message-Id: <20241212083757.605022-9-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 48 ++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 3959fe44c7..d53ce01e82 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1138,7 +1138,6 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, uint32_t offset; uint64_t slpte; uint64_t access_right_check; - uint64_t xlat, size; if (!vtd_iova_sl_range_check(s, iova, ce, aw_bits, pasid)) { error_report_once("%s: detected IOVA overflow (iova=0x%" PRIx64 "," @@ -1191,28 +1190,7 @@ static int vtd_iova_to_slpte(IntelIOMMUState *s, VTDContextEntry *ce, level--; } - xlat = vtd_get_pte_addr(*slptep, aw_bits); - size = ~vtd_pt_level_page_mask(level) + 1; - - /* - * From VT-d spec 3.14: Untranslated requests and translation - * requests that result in an address in the interrupt range will be - * blocked with condition code LGN.4 or SGN.8. - */ - if ((xlat > VTD_INTERRUPT_ADDR_LAST || - xlat + size - 1 < VTD_INTERRUPT_ADDR_FIRST)) { - return 0; - } else { - error_report_once("%s: xlat address is in interrupt range " - "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " - "slpte=0x%" PRIx64 ", write=%d, " - "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", " - "pasid=0x%" PRIx32 ")", - __func__, iova, level, slpte, is_write, - xlat, size, pasid); - return s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR : - -VTD_FR_INTERRUPT_ADDR; - } + return 0; } typedef int (*vtd_page_walk_hook)(const IOMMUTLBEvent *event, void *private); @@ -2064,6 +2042,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, uint8_t access_flags; bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable; VTDIOTLBEntry *iotlb_entry; + uint64_t xlat, size; /* * We have standalone memory region for interrupt addresses, we @@ -2173,6 +2152,29 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, &reads, &writes, s->aw_bits, pasid); } + if (!ret_fr) { + xlat = vtd_get_pte_addr(pte, s->aw_bits); + size = ~vtd_pt_level_page_mask(level) + 1; + + /* + * Per VT-d spec 4.1 section 3.15: Untranslated requests and translation + * requests that result in an address in the interrupt range will be + * blocked with condition code LGN.4 or SGN.8. + */ + if ((xlat <= VTD_INTERRUPT_ADDR_LAST && + xlat + size - 1 >= VTD_INTERRUPT_ADDR_FIRST)) { + error_report_once("%s: xlat address is in interrupt range " + "(iova=0x%" PRIx64 ", level=0x%" PRIx32 ", " + "pte=0x%" PRIx64 ", write=%d, " + "xlat=0x%" PRIx64 ", size=0x%" PRIx64 ", " + "pasid=0x%" PRIx32 ")", + __func__, addr, level, pte, is_write, + xlat, size, pasid); + ret_fr = s->scalable_mode ? -VTD_FR_SM_INTERRUPT_ADDR : + -VTD_FR_INTERRUPT_ADDR; + } + } + if (ret_fr) { vtd_report_fault(s, -ret_fr, is_fpd_set, source_id, addr, is_write, pasid != PCI_NO_PASID, pasid); From 65c4f0999991f6321d6a369fa56c81c57c5b87ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Mathieu--Drif?= Date: Thu, 12 Dec 2024 16:37:46 +0800 Subject: [PATCH 14/49] intel_iommu: Set accessed and dirty bits during stage-1 translation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Clément Mathieu--Drif Signed-off-by: Zhenzhong Duan Reviewed-by: Yi Liu Acked-by: Jason Wang Message-Id: <20241212083757.605022-10-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 25 ++++++++++++++++++++++++- hw/i386/intel_iommu_internal.h | 3 +++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index d53ce01e82..0aeb0dbde9 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -1806,6 +1806,7 @@ static const bool vtd_qualified_faults[] = { [VTD_FR_FS_PAGING_ENTRY_US] = true, [VTD_FR_SM_WRITE] = true, [VTD_FR_SM_INTERRUPT_ADDR] = true, + [VTD_FR_FS_BIT_UPDATE_FAILED] = true, [VTD_FR_MAX] = false, }; @@ -1925,6 +1926,20 @@ static bool vtd_iova_fl_check_canonical(IntelIOMMUState *s, uint64_t iova, } } +static MemTxResult vtd_set_flag_in_pte(dma_addr_t base_addr, uint32_t index, + uint64_t pte, uint64_t flag) +{ + if (pte & flag) { + return MEMTX_OK; + } + pte |= flag; + pte = cpu_to_le64(pte); + return dma_memory_write(&address_space_memory, + base_addr + index * sizeof(pte), + &pte, sizeof(pte), + MEMTXATTRS_UNSPECIFIED); +} + /* * Given the @iova, get relevant @flptep. @flpte_level will be the last level * of the translation, can be used for deciding the size of large page. @@ -1938,7 +1953,7 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, dma_addr_t addr = vtd_get_iova_pgtbl_base(s, ce, pasid); uint32_t level = vtd_get_iova_level(s, ce, pasid); uint32_t offset; - uint64_t flpte; + uint64_t flpte, flag_ad = VTD_FL_A; if (!vtd_iova_fl_check_canonical(s, iova, ce, pasid)) { error_report_once("%s: detected non canonical IOVA (iova=0x%" PRIx64 "," @@ -1985,6 +2000,14 @@ static int vtd_iova_to_flpte(IntelIOMMUState *s, VTDContextEntry *ce, return -VTD_FR_FS_PAGING_ENTRY_RSVD; } + if (vtd_is_last_pte(flpte, level) && is_write) { + flag_ad |= VTD_FL_D; + } + + if (vtd_set_flag_in_pte(addr, offset, flpte, flag_ad) != MEMTX_OK) { + return -VTD_FR_FS_BIT_UPDATE_FAILED; + } + if (vtd_is_last_pte(flpte, level)) { *flptep = flpte; *flpte_level = level; diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 3e7365dfff..22dd3faf0c 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -332,6 +332,7 @@ typedef enum VTDFaultReason { /* Output address in the interrupt address range for scalable mode */ VTD_FR_SM_INTERRUPT_ADDR = 0x87, + VTD_FR_FS_BIT_UPDATE_FAILED = 0x91, /* SFS.10 */ VTD_FR_MAX, /* Guard */ } VTDFaultReason; @@ -564,6 +565,8 @@ typedef struct VTDRootEntry VTDRootEntry; #define VTD_FL_P 1ULL #define VTD_FL_RW (1ULL << 1) #define VTD_FL_US (1ULL << 2) +#define VTD_FL_A (1ULL << 5) +#define VTD_FL_D (1ULL << 6) /* Second Level Page Translation Pointer*/ #define VTD_SM_PASID_ENTRY_SLPTPTR (~0xfffULL) From 16d4e418e98feeb53f28d17eeffb198fe0fd6f22 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:47 +0800 Subject: [PATCH 15/49] intel_iommu: Flush stage-1 cache in iotlb invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to spec, Page-Selective-within-Domain Invalidation (11b): 1. IOTLB entries caching second-stage mappings (PGTT=010b) or pass-through (PGTT=100b) mappings associated with the specified domain-id and the input-address range are invalidated. 2. IOTLB entries caching first-stage (PGTT=001b) or nested (PGTT=011b) mapping associated with specified domain-id are invalidated. So per spec definition the Page-Selective-within-Domain Invalidation needs to flush first stage and nested cached IOTLB entries as well. We don't support nested yet and pass-through mapping is never cached, so what in iotlb cache are only first-stage and second-stage mappings. Add a tag pgtt in VTDIOTLBEntry to mark PGTT type of the mapping and invalidate entries based on PGTT type. Signed-off-by: Zhenzhong Duan Reviewed-by: Clément Mathieu--Drif Acked-by: Jason Wang Reviewed-by: Yi Liu Message-Id: <20241212083757.605022-11-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 27 +++++++++++++++++++++------ include/hw/i386/intel_iommu.h | 1 + 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 0aeb0dbde9..95f344eb46 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -305,9 +305,21 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask; uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K; - return (entry->domain_id == info->domain_id) && - (((entry->gfn & info->mask) == gfn) || - (entry->gfn == gfn_tlb)); + + if (entry->domain_id != info->domain_id) { + return false; + } + + /* + * According to spec, IOTLB entries caching first-stage (PGTT=001b) or + * nested (PGTT=011b) mapping associated with specified domain-id are + * invalidated. Nested isn't supported yet, so only need to check 001b. + */ + if (entry->pgtt == VTD_SM_PASID_ENTRY_FLT) { + return true; + } + + return (entry->gfn & info->mask) == gfn || entry->gfn == gfn_tlb; } /* Reset all the gen of VTDAddressSpace to zero and set the gen of @@ -382,7 +394,7 @@ out: static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, uint16_t domain_id, hwaddr addr, uint64_t pte, uint8_t access_flags, uint32_t level, - uint32_t pasid) + uint32_t pasid, uint8_t pgtt) { VTDIOTLBEntry *entry = g_malloc(sizeof(*entry)); struct vtd_iotlb_key *key = g_malloc(sizeof(*key)); @@ -400,6 +412,7 @@ static void vtd_update_iotlb(IntelIOMMUState *s, uint16_t source_id, entry->access_flags = access_flags; entry->mask = vtd_pt_level_page_mask(level); entry->pasid = pasid; + entry->pgtt = pgtt; key->gfn = gfn; key->sid = source_id; @@ -2062,7 +2075,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, bool is_fpd_set = false; bool reads = true; bool writes = true; - uint8_t access_flags; + uint8_t access_flags, pgtt; bool rid2pasid = (pasid == PCI_NO_PASID) && s->root_scalable; VTDIOTLBEntry *iotlb_entry; uint64_t xlat, size; @@ -2171,9 +2184,11 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, if (s->flts && s->root_scalable) { ret_fr = vtd_iova_to_flpte(s, &ce, addr, is_write, &pte, &level, &reads, &writes, s->aw_bits, pasid); + pgtt = VTD_SM_PASID_ENTRY_FLT; } else { ret_fr = vtd_iova_to_slpte(s, &ce, addr, is_write, &pte, &level, &reads, &writes, s->aw_bits, pasid); + pgtt = VTD_SM_PASID_ENTRY_SLT; } if (!ret_fr) { xlat = vtd_get_pte_addr(pte, s->aw_bits); @@ -2207,7 +2222,7 @@ static bool vtd_do_iommu_translate(VTDAddressSpace *vtd_as, PCIBus *bus, page_mask = vtd_pt_level_page_mask(level); access_flags = IOMMU_ACCESS_FLAG(reads, writes); vtd_update_iotlb(s, source_id, vtd_get_domain_id(s, &ce, pasid), - addr, pte, access_flags, level, pasid); + addr, pte, access_flags, level, pasid, pgtt); out: vtd_iommu_unlock(s); entry->iova = addr & page_mask; diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index f44f3eb63a..a434c2489c 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -155,6 +155,7 @@ struct VTDIOTLBEntry { uint64_t pte; uint64_t mask; uint8_t access_flags; + uint8_t pgtt; }; /* VT-d Source-ID Qualifier types */ From 6ebe6cf2a0663f46f94a69eca5296f608da12f2e Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:48 +0800 Subject: [PATCH 16/49] intel_iommu: Process PASID-based iotlb invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PASID-based iotlb (piotlb) is used during walking Intel VT-d stage-1 page table. This emulates the stage-1 page table iotlb invalidation requested by a PASID-based IOTLB Invalidate Descriptor (P_IOTLB). Signed-off-by: Yi Liu Signed-off-by: Zhenzhong Duan Reviewed-by: Clément Mathieu--Drif Acked-by: Jason Wang Reviewed-by: Yi Liu Message-Id: <20241212083757.605022-12-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 43 ++++++++++++++++++++++++++++++++++ hw/i386/intel_iommu_internal.h | 3 +++ 2 files changed, 46 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 95f344eb46..c45a486bf8 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -322,6 +322,28 @@ static gboolean vtd_hash_remove_by_page(gpointer key, gpointer value, return (entry->gfn & info->mask) == gfn || entry->gfn == gfn_tlb; } +static gboolean vtd_hash_remove_by_page_piotlb(gpointer key, gpointer value, + gpointer user_data) +{ + VTDIOTLBEntry *entry = (VTDIOTLBEntry *)value; + VTDIOTLBPageInvInfo *info = (VTDIOTLBPageInvInfo *)user_data; + uint64_t gfn = (info->addr >> VTD_PAGE_SHIFT_4K) & info->mask; + uint64_t gfn_tlb = (info->addr & entry->mask) >> VTD_PAGE_SHIFT_4K; + + /* + * According to spec, PASID-based-IOTLB Invalidation in page granularity + * doesn't invalidate IOTLB entries caching second-stage (PGTT=010b) + * or pass-through (PGTT=100b) mappings. Nested isn't supported yet, + * so only need to check first-stage (PGTT=001b) mappings. + */ + if (entry->pgtt != VTD_SM_PASID_ENTRY_FLT) { + return false; + } + + return entry->domain_id == info->domain_id && entry->pasid == info->pasid && + ((entry->gfn & info->mask) == gfn || entry->gfn == gfn_tlb); +} + /* Reset all the gen of VTDAddressSpace to zero and set the gen of * IntelIOMMUState to 1. Must be called with IOMMU lock held. */ @@ -2937,11 +2959,29 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s, } } +static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, + uint32_t pasid, hwaddr addr, uint8_t am) +{ + VTDIOTLBPageInvInfo info; + + info.domain_id = domain_id; + info.pasid = pasid; + info.addr = addr; + info.mask = ~((1 << am) - 1); + + vtd_iommu_lock(s); + g_hash_table_foreach_remove(s->iotlb, + vtd_hash_remove_by_page_piotlb, &info); + vtd_iommu_unlock(s); +} + static bool vtd_process_piotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { uint16_t domain_id; uint32_t pasid; + hwaddr addr; + uint8_t am; uint64_t mask[4] = {VTD_INV_DESC_PIOTLB_RSVD_VAL0, VTD_INV_DESC_PIOTLB_RSVD_VAL1, VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; @@ -2959,6 +2999,9 @@ static bool vtd_process_piotlb_desc(IntelIOMMUState *s, break; case VTD_INV_DESC_PIOTLB_PSI_IN_PASID: + am = VTD_INV_DESC_PIOTLB_AM(inv_desc->val[1]); + addr = (hwaddr) VTD_INV_DESC_PIOTLB_ADDR(inv_desc->val[1]); + vtd_piotlb_page_invalidate(s, domain_id, pasid, addr, am); break; default: diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 22dd3faf0c..5e4e563e62 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -471,6 +471,9 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_PIOTLB_PSI_IN_PASID (3ULL << 4) #define VTD_INV_DESC_PIOTLB_DID(val) (((val) >> 16) & VTD_DOMAIN_ID_MASK) #define VTD_INV_DESC_PIOTLB_PASID(val) (((val) >> 32) & 0xfffffULL) +#define VTD_INV_DESC_PIOTLB_AM(val) ((val) & 0x3fULL) +#define VTD_INV_DESC_PIOTLB_IH(val) (((val) >> 6) & 0x1) +#define VTD_INV_DESC_PIOTLB_ADDR(val) ((val) & ~0xfffULL) #define VTD_INV_DESC_PIOTLB_RSVD_VAL0 0xfff000000000f1c0ULL #define VTD_INV_DESC_PIOTLB_RSVD_VAL1 0xf80ULL From 1075645d430e291404d19c88fc80d989669fa4c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Mathieu--Drif?= Date: Thu, 12 Dec 2024 16:37:49 +0800 Subject: [PATCH 17/49] intel_iommu: Add an internal API to find an address space with PASID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will be used to implement the device IOTLB invalidation Signed-off-by: Clément Mathieu--Drif Signed-off-by: Zhenzhong Duan Acked-by: Jason Wang Reviewed-by: Yi Liu Message-Id: <20241212083757.605022-13-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index c45a486bf8..9aa807593e 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -70,6 +70,11 @@ struct vtd_hiod_key { uint8_t devfn; }; +struct vtd_as_raw_key { + uint16_t sid; + uint32_t pasid; +}; + struct vtd_iotlb_key { uint64_t gfn; uint32_t pasid; @@ -1859,29 +1864,32 @@ static inline bool vtd_is_interrupt_addr(hwaddr addr) return VTD_INTERRUPT_ADDR_FIRST <= addr && addr <= VTD_INTERRUPT_ADDR_LAST; } -static gboolean vtd_find_as_by_sid(gpointer key, gpointer value, - gpointer user_data) +static gboolean vtd_find_as_by_sid_and_pasid(gpointer key, gpointer value, + gpointer user_data) { struct vtd_as_key *as_key = (struct vtd_as_key *)key; - uint16_t target_sid = *(uint16_t *)user_data; + struct vtd_as_raw_key *target = (struct vtd_as_raw_key *)user_data; uint16_t sid = PCI_BUILD_BDF(pci_bus_num(as_key->bus), as_key->devfn); - return sid == target_sid; + + return (as_key->pasid == target->pasid) && (sid == target->sid); +} + +static VTDAddressSpace *vtd_get_as_by_sid_and_pasid(IntelIOMMUState *s, + uint16_t sid, + uint32_t pasid) +{ + struct vtd_as_raw_key key = { + .sid = sid, + .pasid = pasid + }; + + return g_hash_table_find(s->vtd_address_spaces, + vtd_find_as_by_sid_and_pasid, &key); } static VTDAddressSpace *vtd_get_as_by_sid(IntelIOMMUState *s, uint16_t sid) { - uint8_t bus_num = PCI_BUS_NUM(sid); - VTDAddressSpace *vtd_as = s->vtd_as_cache[bus_num]; - - if (vtd_as && - (sid == PCI_BUILD_BDF(pci_bus_num(vtd_as->bus), vtd_as->devfn))) { - return vtd_as; - } - - vtd_as = g_hash_table_find(s->vtd_address_spaces, vtd_find_as_by_sid, &sid); - s->vtd_as_cache[bus_num] = vtd_as; - - return vtd_as; + return vtd_get_as_by_sid_and_pasid(s, sid, PCI_NO_PASID); } static void vtd_pt_enable_fast_path(IntelIOMMUState *s, uint16_t source_id) From dd8200503e230a4e32f930f5a57556b04651c16f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Mathieu--Drif?= Date: Thu, 12 Dec 2024 16:37:50 +0800 Subject: [PATCH 18/49] intel_iommu: Add support for PASID-based device IOTLB invalidation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Clément Mathieu--Drif Signed-off-by: Zhenzhong Duan Acked-by: Jason Wang Message-Id: <20241212083757.605022-14-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 50 ++++++++++++++++++++++++++++++++++ hw/i386/intel_iommu_internal.h | 11 ++++++++ 2 files changed, 61 insertions(+) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 9aa807593e..5634a37a74 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3075,6 +3075,49 @@ static void do_invalidate_device_tlb(VTDAddressSpace *vtd_dev_as, memory_region_notify_iommu(&vtd_dev_as->iommu, 0, event); } +static bool vtd_process_device_piotlb_desc(IntelIOMMUState *s, + VTDInvDesc *inv_desc) +{ + uint16_t sid; + VTDAddressSpace *vtd_dev_as; + bool size; + bool global; + hwaddr addr; + uint32_t pasid; + uint64_t mask[4] = {VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL0, + VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL1, + VTD_INV_DESC_ALL_ONE, VTD_INV_DESC_ALL_ONE}; + + if (!vtd_inv_desc_reserved_check(s, inv_desc, mask, true, + __func__, "device piotlb inv")) { + return false; + } + + global = VTD_INV_DESC_PASID_DEVICE_IOTLB_GLOBAL(inv_desc->hi); + size = VTD_INV_DESC_PASID_DEVICE_IOTLB_SIZE(inv_desc->hi); + addr = VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(inv_desc->hi); + sid = VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(inv_desc->lo); + if (global) { + QLIST_FOREACH(vtd_dev_as, &s->vtd_as_with_notifiers, next) { + if ((vtd_dev_as->pasid != PCI_NO_PASID) && + (PCI_BUILD_BDF(pci_bus_num(vtd_dev_as->bus), + vtd_dev_as->devfn) == sid)) { + do_invalidate_device_tlb(vtd_dev_as, size, addr); + } + } + } else { + pasid = VTD_INV_DESC_PASID_DEVICE_IOTLB_PASID(inv_desc->lo); + vtd_dev_as = vtd_get_as_by_sid_and_pasid(s, sid, pasid); + if (!vtd_dev_as) { + return true; + } + + do_invalidate_device_tlb(vtd_dev_as, size, addr); + } + + return true; +} + static bool vtd_process_device_iotlb_desc(IntelIOMMUState *s, VTDInvDesc *inv_desc) { @@ -3161,6 +3204,13 @@ static bool vtd_process_inv_desc(IntelIOMMUState *s) } break; + case VTD_INV_DESC_DEV_PIOTLB: + trace_vtd_inv_desc("device-piotlb", inv_desc.hi, inv_desc.lo); + if (!vtd_process_device_piotlb_desc(s, &inv_desc)) { + return false; + } + break; + case VTD_INV_DESC_DEVICE: trace_vtd_inv_desc("device", inv_desc.hi, inv_desc.lo); if (!vtd_process_device_iotlb_desc(s, &inv_desc)) { diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 5e4e563e62..2c977aa7da 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -385,6 +385,7 @@ typedef union VTDInvDesc VTDInvDesc; #define VTD_INV_DESC_WAIT 0x5 /* Invalidation Wait Descriptor */ #define VTD_INV_DESC_PIOTLB 0x6 /* PASID-IOTLB Invalidate Desc */ #define VTD_INV_DESC_PC 0x7 /* PASID-cache Invalidate Desc */ +#define VTD_INV_DESC_DEV_PIOTLB 0x8 /* PASID-based-DIOTLB inv_desc*/ #define VTD_INV_DESC_NONE 0 /* Not an Invalidate Descriptor */ /* Masks for Invalidation Wait Descriptor*/ @@ -426,6 +427,16 @@ typedef union VTDInvDesc VTDInvDesc; /* Masks for Interrupt Entry Invalidate Descriptor */ #define VTD_INV_DESC_IEC_RSVD 0xffff000007fff1e0ULL +/* Masks for PASID based Device IOTLB Invalidate Descriptor */ +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_ADDR(val) ((val) & \ + 0xfffffffffffff000ULL) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_SIZE(val) ((val >> 11) & 0x1) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_GLOBAL(val) ((val) & 0x1) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_SID(val) (((val) >> 16) & 0xffffULL) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_PASID(val) ((val >> 32) & 0xfffffULL) +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL0 0xfff000000000f000ULL +#define VTD_INV_DESC_PASID_DEVICE_IOTLB_RSVD_VAL1 0x7feULL + /* Rsvd field masks for spte */ #define VTD_SPTE_SNP 0x800ULL From 1ab93575bdcb2972c99a174a957b598f7457a899 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:51 +0800 Subject: [PATCH 19/49] intel_iommu: piotlb invalidation should notify unmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is used by some emulated devices which caches address translation result. When piotlb invalidation issued in guest, those caches should be refreshed. There is already a similar implementation in iotlb invalidation. So update vtd_iotlb_page_invalidate_notify() to make it work also for piotlb invalidation. For device that does not implement ATS capability or disable it but still caches the translation result, it is better to implement ATS cap or enable it if there is need to cache the translation result. Signed-off-by: Yi Sun Signed-off-by: Zhenzhong Duan Acked-by: Jason Wang Reviewed-by: Clément Mathieu--Drif Message-Id: <20241212083757.605022-15-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 5634a37a74..7d4b99523d 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -2450,8 +2450,13 @@ static void vtd_iotlb_domain_invalidate(IntelIOMMUState *s, uint16_t domain_id) } } +/* + * There is no pasid field in iotlb invalidation descriptor, so PCI_NO_PASID + * is passed as parameter. Piotlb invalidation supports pasid, pasid in its + * descriptor is passed which should not be PCI_NO_PASID. + */ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, - uint16_t domain_id, hwaddr addr, + uint16_t domain_id, hwaddr addr, uint8_t am, uint32_t pasid) { VTDAddressSpace *vtd_as; @@ -2460,19 +2465,37 @@ static void vtd_iotlb_page_invalidate_notify(IntelIOMMUState *s, hwaddr size = (1 << am) * VTD_PAGE_SIZE; QLIST_FOREACH(vtd_as, &(s->vtd_as_with_notifiers), next) { - if (pasid != PCI_NO_PASID && pasid != vtd_as->pasid) { - continue; - } ret = vtd_dev_to_context_entry(s, pci_bus_num(vtd_as->bus), vtd_as->devfn, &ce); if (!ret && domain_id == vtd_get_domain_id(s, &ce, vtd_as->pasid)) { + uint32_t rid2pasid = PCI_NO_PASID; + + if (s->root_scalable) { + rid2pasid = VTD_CE_GET_RID2PASID(&ce); + } + + /* + * In legacy mode, vtd_as->pasid == pasid is always true. + * In scalable mode, for vtd address space backing a PCI + * device without pasid, needs to compare pasid with + * rid2pasid of this device. + */ + if (!(vtd_as->pasid == pasid || + (vtd_as->pasid == PCI_NO_PASID && pasid == rid2pasid))) { + continue; + } + if (vtd_as_has_map_notifier(vtd_as)) { /* - * As long as we have MAP notifications registered in - * any of our IOMMU notifiers, we need to sync the - * shadow page table. + * When stage-1 translation is off, as long as we have MAP + * notifications registered in any of our IOMMU notifiers, + * we need to sync the shadow page table. Otherwise VFIO + * device attaches to nested page table instead of shadow + * page table, so no need to sync. */ - vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size); + if (!s->flts || !s->root_scalable) { + vtd_sync_shadow_page_table_range(vtd_as, &ce, addr, size); + } } else { /* * For UNMAP-only notifiers, we don't need to walk the @@ -2960,7 +2983,7 @@ static void vtd_piotlb_pasid_invalidate(IntelIOMMUState *s, continue; } - if (!s->flts) { + if (!s->flts || !vtd_as_has_map_notifier(vtd_as)) { vtd_address_space_sync(vtd_as); } } @@ -2981,6 +3004,8 @@ static void vtd_piotlb_page_invalidate(IntelIOMMUState *s, uint16_t domain_id, g_hash_table_foreach_remove(s->iotlb, vtd_hash_remove_by_page_piotlb, &info); vtd_iommu_unlock(s); + + vtd_iotlb_page_invalidate_notify(s, domain_id, addr, am, pasid); } static bool vtd_process_piotlb_desc(IntelIOMMUState *s, From 9609d7101867819086516c7df845337dcee1cf08 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:52 +0800 Subject: [PATCH 20/49] tests/acpi: q35: allow DMAR acpi table changes Signed-off-by: Zhenzhong Duan Acked-by: Jason Wang Message-Id: <20241212083757.605022-16-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test-allowed-diff.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..46f80be9ca 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,2 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/x86/q35/DMAR.dmar", From ddd84fd0c1f8f62e8384591f7203aaabb935199a Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:53 +0800 Subject: [PATCH 21/49] intel_iommu: Set default aw_bits to 48 starting from QEMU 9.2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to VTD spec, stage-1 page table could support 4-level and 5-level paging. However, 5-level paging translation emulation is unsupported yet. That means the only supported value for aw_bits is 48. So default aw_bits to 48 when stage-1 translation is turned on. For legacy and scalable modes, 48 is the default choice for modern OS when both 48 and 39 are supported. So it makes sense to set default to 48 for these two modes too starting from QEMU 9.2. Use pc_compat_9_1 to handle the compatibility for machines before 9.2. Suggested-by: Jason Wang Signed-off-by: Zhenzhong Duan Reviewed-by: Clément Mathieu--Drif Reviewed-by: Yi Liu Acked-by: Jason Wang Message-Id: <20241212083757.605022-17-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc.c | 1 + include/hw/i386/intel_iommu.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 9334b033f6..b46975c8a4 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -86,6 +86,7 @@ GlobalProperty pc_compat_9_1[] = { { "ICH9-LPC", "x-smi-swsmi-timer", "off" }, { "ICH9-LPC", "x-smi-periodic-timer", "off" }, { TYPE_INTEL_IOMMU_DEVICE, "stale-tm", "on" }, + { TYPE_INTEL_IOMMU_DEVICE, "aw-bits", "39" }, }; const size_t pc_compat_9_1_len = G_N_ELEMENTS(pc_compat_9_1); diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index a434c2489c..72428fefa4 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -45,7 +45,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(IntelIOMMUState, INTEL_IOMMU_DEVICE) #define DMAR_REG_SIZE 0x230 #define VTD_HOST_AW_39BIT 39 #define VTD_HOST_AW_48BIT 48 -#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_39BIT +#define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_48BIT #define VTD_HAW_MASK(aw) ((1ULL << (aw)) - 1) #define DMAR_REPORT_F_INTR (1) From 81ab964f21620db32558277f220eb0d803c14109 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:54 +0800 Subject: [PATCH 22/49] tests/acpi: q35: Update host address width in DMAR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Differences: @@ -1,39 +1,39 @@ /* * Intel ACPI Component Architecture * AML/ASL+ Disassembler version 20200925 (64-bit version) * Copyright (c) 2000 - 2020 Intel Corporation * - * Disassembly of tests/data/acpi/x86/q35/DMAR.dmar, Mon Nov 11 15:31:18 2024 + * Disassembly of /tmp/aml-SPJ4W2, Mon Nov 11 15:31:18 2024 * * ACPI Data Table [DMAR] * * Format: [HexOffset DecimalOffset ByteLength] FieldName : FieldValue */ [000h 0000 4] Signature : "DMAR" [DMA Remapping table] [004h 0004 4] Table Length : 00000078 [008h 0008 1] Revision : 01 -[009h 0009 1] Checksum : 15 +[009h 0009 1] Checksum : 0C [00Ah 0010 6] Oem ID : "BOCHS " [010h 0016 8] Oem Table ID : "BXPC " [018h 0024 4] Oem Revision : 00000001 [01Ch 0028 4] Asl Compiler ID : "BXPC" [020h 0032 4] Asl Compiler Revision : 00000001 -[024h 0036 1] Host Address Width : 26 +[024h 0036 1] Host Address Width : 2F [025h 0037 1] Flags : 01 [026h 0038 10] Reserved : 00 00 00 00 00 00 00 00 00 00 [030h 0048 2] Subtable Type : 0000 [Hardware Unit Definition] [032h 0050 2] Length : 0040 [034h 0052 1] Flags : 00 [035h 0053 1] Reserved : 00 [036h 0054 2] PCI Segment Number : 0000 [038h 0056 8] Register Base Address : 00000000FED90000 [040h 0064 1] Device Scope Type : 03 [IOAPIC Device] [041h 0065 1] Entry Length : 08 [042h 0066 2] Reserved : 0000 [044h 0068 1] Enumeration ID : 00 [045h 0069 1] PCI Bus Number : FF Signed-off-by: Zhenzhong Duan Acked-by: Clément Mathieu--Drif Message-Id: <20241212083757.605022-18-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/x86/q35/DMAR.dmar | Bin 120 -> 120 bytes tests/qtest/bios-tables-test-allowed-diff.h | 1 - 2 files changed, 1 deletion(-) diff --git a/tests/data/acpi/x86/q35/DMAR.dmar b/tests/data/acpi/x86/q35/DMAR.dmar index 0dca6e68ad8a8ca5b981bcfbc745385a63e9f216..0c05976715c6f2f6ec46ef6d37790f86a392b5ea 100644 GIT binary patch delta 21 ccmb=Z;BxVG460yYU|{5#$R)+7KT$Op05(qqk^lez delta 21 ccmb=Z;BxVG460yYU|Sg05*ICk^lez diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index 46f80be9ca..dfb8523c8b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1,2 +1 @@ /* List of comma-separated changed AML files to ignore */ -"tests/data/acpi/x86/q35/DMAR.dmar", From aa68a9fbdb81c47c2a48a3199559df470c3d9eba Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:55 +0800 Subject: [PATCH 23/49] intel_iommu: Introduce a property x-flts for stage-1 translation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel VT-d 3.0 introduces scalable mode, and it has a bunch of capabilities related to scalable mode translation, thus there are multiple combinations. This vIOMMU implementation wants to simplify it with a new property "x-flts". When turned on in scalable mode, stage-1 translation is supported. When turned on in legacy mode, throw out error. With stage-1 translation support exposed to user, also accurate the pasid entry check in vtd_pe_type_check(). Suggested-by: Jason Wang Signed-off-by: Yi Liu Signed-off-by: Yi Sun Signed-off-by: Zhenzhong Duan Reviewed-by: Clément Mathieu--Drif Message-Id: <20241212083757.605022-19-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 28 +++++++++++++++++++--------- hw/i386/intel_iommu_internal.h | 2 ++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 7d4b99523d..0111186f7a 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -803,16 +803,18 @@ static inline bool vtd_is_fl_level_supported(IntelIOMMUState *s, uint32_t level) } /* Return true if check passed, otherwise false */ -static inline bool vtd_pe_type_check(X86IOMMUState *x86_iommu, - VTDPASIDEntry *pe) +static inline bool vtd_pe_type_check(IntelIOMMUState *s, VTDPASIDEntry *pe) { switch (VTD_PE_GET_TYPE(pe)) { - case VTD_SM_PASID_ENTRY_SLT: - return true; - case VTD_SM_PASID_ENTRY_PT: - return x86_iommu->pt_supported; case VTD_SM_PASID_ENTRY_FLT: + return !!(s->ecap & VTD_ECAP_FLTS); + case VTD_SM_PASID_ENTRY_SLT: + return !!(s->ecap & VTD_ECAP_SLTS); case VTD_SM_PASID_ENTRY_NESTED: + /* Not support NESTED page table type yet */ + return false; + case VTD_SM_PASID_ENTRY_PT: + return !!(s->ecap & VTD_ECAP_PT); default: /* Unknown type */ return false; @@ -861,7 +863,6 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, uint8_t pgtt; uint32_t index; dma_addr_t entry_size; - X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); index = VTD_PASID_TABLE_INDEX(pasid); entry_size = VTD_PASID_ENTRY_SIZE; @@ -875,7 +876,7 @@ static int vtd_get_pe_in_pasid_leaf_table(IntelIOMMUState *s, } /* Do translation type check */ - if (!vtd_pe_type_check(x86_iommu, pe)) { + if (!vtd_pe_type_check(s, pe)) { return -VTD_FR_PASID_TABLE_ENTRY_INV; } @@ -3827,6 +3828,7 @@ static const Property vtd_properties[] = { VTD_HOST_ADDRESS_WIDTH), DEFINE_PROP_BOOL("caching-mode", IntelIOMMUState, caching_mode, FALSE), DEFINE_PROP_BOOL("x-scalable-mode", IntelIOMMUState, scalable_mode, FALSE), + DEFINE_PROP_BOOL("x-flts", IntelIOMMUState, flts, FALSE), DEFINE_PROP_BOOL("snoop-control", IntelIOMMUState, snoop_control, false), DEFINE_PROP_BOOL("x-pasid-mode", IntelIOMMUState, pasid, false), DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), @@ -4557,7 +4559,10 @@ static void vtd_cap_init(IntelIOMMUState *s) } /* TODO: read cap/ecap from host to decide which cap to be exposed. */ - if (s->scalable_mode) { + if (s->flts) { + s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS; + s->cap |= VTD_CAP_FS1GP; + } else if (s->scalable_mode) { s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS; } @@ -4736,6 +4741,11 @@ static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) } } + if (!s->scalable_mode && s->flts) { + error_setg(errp, "x-flts is only available in scalable mode"); + return false; + } + if (!s->flts && s->aw_bits != VTD_HOST_AW_39BIT && s->aw_bits != VTD_HOST_AW_48BIT) { error_setg(errp, "%s: supported values for aw-bits are: %d, %d", diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index 2c977aa7da..e8b211e8b0 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -195,6 +195,7 @@ #define VTD_ECAP_PASID (1ULL << 40) #define VTD_ECAP_SMTS (1ULL << 43) #define VTD_ECAP_SLTS (1ULL << 46) +#define VTD_ECAP_FLTS (1ULL << 47) /* CAP_REG */ /* (offset >> 4) << 24 */ @@ -211,6 +212,7 @@ #define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35)) #define VTD_CAP_DRAIN_WRITE (1ULL << 54) #define VTD_CAP_DRAIN_READ (1ULL << 55) +#define VTD_CAP_FS1GP (1ULL << 56) #define VTD_CAP_DRAIN (VTD_CAP_DRAIN_READ | VTD_CAP_DRAIN_WRITE) #define VTD_CAP_CM (1ULL << 7) #define VTD_PASID_ID_SHIFT 20 From d9d32478ed4543539322761c19a73edf5d0be059 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:56 +0800 Subject: [PATCH 24/49] intel_iommu: Introduce a property to control FS1GP cap bit setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This gives user flexibility to turn off FS1GP for debug purpose. It is also useful for future nesting feature. When host IOMMU doesn't support FS1GP but vIOMMU does, nested page table on host side works after turning FS1GP off in vIOMMU. This property has no effect when vIOMMU is in legacy mode or x-flts=off in scalable modme. Signed-off-by: Zhenzhong Duan Reviewed-by: Clément Mathieu--Drif Reviewed-by: Yi Liu Acked-by: Jason Wang Message-Id: <20241212083757.605022-20-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/intel_iommu.c | 5 ++++- include/hw/i386/intel_iommu.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index 0111186f7a..f366c223d0 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -3834,6 +3834,7 @@ static const Property vtd_properties[] = { DEFINE_PROP_BOOL("dma-drain", IntelIOMMUState, dma_drain, true), DEFINE_PROP_BOOL("dma-translation", IntelIOMMUState, dma_translation, true), DEFINE_PROP_BOOL("stale-tm", IntelIOMMUState, stale_tm, false), + DEFINE_PROP_BOOL("fs1gp", IntelIOMMUState, fs1gp, true), }; /* Read IRTE entry with specific index */ @@ -4561,7 +4562,9 @@ static void vtd_cap_init(IntelIOMMUState *s) /* TODO: read cap/ecap from host to decide which cap to be exposed. */ if (s->flts) { s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_FLTS; - s->cap |= VTD_CAP_FS1GP; + if (s->fs1gp) { + s->cap |= VTD_CAP_FS1GP; + } } else if (s->scalable_mode) { s->ecap |= VTD_ECAP_SMTS | VTD_ECAP_SRS | VTD_ECAP_SLTS; } diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 72428fefa4..9e92bffd5a 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -307,6 +307,7 @@ struct IntelIOMMUState { bool dma_drain; /* Whether DMA r/w draining enabled */ bool dma_translation; /* Whether DMA translation supported */ bool pasid; /* Whether to support PASID */ + bool fs1gp; /* First Stage 1-GByte Page Support */ /* Transient Mapping, Reserved(0) since VTD spec revision 3.2 */ bool stale_tm; From 2c746dfe1c69896b0e434d37efe014654f0a3a65 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Thu, 12 Dec 2024 16:37:57 +0800 Subject: [PATCH 25/49] tests/qtest: Add intel-iommu test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the framework to test the intel-iommu device. Currently only tested cap/ecap bits correctness when x-flts=on in scalable mode. Also tested cap/ecap bits consistency before and after system reset. Signed-off-by: Zhenzhong Duan Acked-by: Thomas Huth Reviewed-by: Clément Mathieu--Drif Acked-by: Jason Wang Message-Id: <20241212083757.605022-21-zhenzhong.duan@intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 1 + include/hw/i386/intel_iommu.h | 1 + tests/qtest/intel-iommu-test.c | 64 ++++++++++++++++++++++++++++++++++ tests/qtest/meson.build | 1 + 4 files changed, 67 insertions(+) create mode 100644 tests/qtest/intel-iommu-test.c diff --git a/MAINTAINERS b/MAINTAINERS index 8b9d9a7cac..a928ce3e41 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3711,6 +3711,7 @@ F: hw/i386/intel_iommu.c F: hw/i386/intel_iommu_internal.h F: include/hw/i386/intel_iommu.h F: tests/functional/test_intel_iommu.py +F: tests/qtest/intel-iommu-test.c AMD-Vi Emulation S: Orphan diff --git a/include/hw/i386/intel_iommu.h b/include/hw/i386/intel_iommu.h index 9e92bffd5a..e95477e855 100644 --- a/include/hw/i386/intel_iommu.h +++ b/include/hw/i386/intel_iommu.h @@ -47,6 +47,7 @@ OBJECT_DECLARE_SIMPLE_TYPE(IntelIOMMUState, INTEL_IOMMU_DEVICE) #define VTD_HOST_AW_48BIT 48 #define VTD_HOST_ADDRESS_WIDTH VTD_HOST_AW_48BIT #define VTD_HAW_MASK(aw) ((1ULL << (aw)) - 1) +#define VTD_MGAW_FROM_CAP(cap) ((cap >> 16) & 0x3fULL) #define DMAR_REPORT_F_INTR (1) diff --git a/tests/qtest/intel-iommu-test.c b/tests/qtest/intel-iommu-test.c new file mode 100644 index 0000000000..c521b3796e --- /dev/null +++ b/tests/qtest/intel-iommu-test.c @@ -0,0 +1,64 @@ +/* + * QTest testcase for intel-iommu + * + * Copyright (c) 2024 Intel, Inc. + * + * Author: Zhenzhong Duan + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "libqtest.h" +#include "hw/i386/intel_iommu_internal.h" + +#define CAP_STAGE_1_FIXED1 (VTD_CAP_FRO | VTD_CAP_NFR | VTD_CAP_ND | \ + VTD_CAP_MAMV | VTD_CAP_PSI | VTD_CAP_SLLPS) +#define ECAP_STAGE_1_FIXED1 (VTD_ECAP_QI | VTD_ECAP_IR | VTD_ECAP_IRO | \ + VTD_ECAP_MHMV | VTD_ECAP_SMTS | VTD_ECAP_FLTS) + +static inline uint64_t vtd_reg_readq(QTestState *s, uint64_t offset) +{ + return qtest_readq(s, Q35_HOST_BRIDGE_IOMMU_ADDR + offset); +} + +static void test_intel_iommu_stage_1(void) +{ + uint8_t init_csr[DMAR_REG_SIZE]; /* register values */ + uint8_t post_reset_csr[DMAR_REG_SIZE]; /* register values */ + uint64_t cap, ecap, tmp; + QTestState *s; + + s = qtest_init("-M q35 -device intel-iommu,x-scalable-mode=on,x-flts=on"); + + cap = vtd_reg_readq(s, DMAR_CAP_REG); + g_assert((cap & CAP_STAGE_1_FIXED1) == CAP_STAGE_1_FIXED1); + + tmp = cap & VTD_CAP_SAGAW_MASK; + g_assert(tmp == (VTD_CAP_SAGAW_39bit | VTD_CAP_SAGAW_48bit)); + + tmp = VTD_MGAW_FROM_CAP(cap); + g_assert(tmp == VTD_HOST_AW_48BIT - 1); + + ecap = vtd_reg_readq(s, DMAR_ECAP_REG); + g_assert((ecap & ECAP_STAGE_1_FIXED1) == ECAP_STAGE_1_FIXED1); + + qtest_memread(s, Q35_HOST_BRIDGE_IOMMU_ADDR, init_csr, DMAR_REG_SIZE); + + qobject_unref(qtest_qmp(s, "{ 'execute': 'system_reset' }")); + qtest_qmp_eventwait(s, "RESET"); + + qtest_memread(s, Q35_HOST_BRIDGE_IOMMU_ADDR, post_reset_csr, DMAR_REG_SIZE); + /* Ensure registers are consistent after hard reset */ + g_assert(!memcmp(init_csr, post_reset_csr, DMAR_REG_SIZE)); + + qtest_quit(s); +} + +int main(int argc, char **argv) +{ + g_test_init(&argc, &argv, NULL); + qtest_add_func("/q35/intel-iommu/stage-1", test_intel_iommu_stage_1); + + return g_test_run(); +} diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build index bf4d5c268b..edd53ec995 100644 --- a/tests/qtest/meson.build +++ b/tests/qtest/meson.build @@ -93,6 +93,7 @@ qtests_i386 = \ (config_all_devices.has_key('CONFIG_SB16') ? ['fuzz-sb16-test'] : []) + \ (config_all_devices.has_key('CONFIG_SDHCI_PCI') ? ['fuzz-sdcard-test'] : []) + \ (config_all_devices.has_key('CONFIG_ESP_PCI') ? ['am53c974-test'] : []) + \ + (config_all_devices.has_key('CONFIG_VTD') ? ['intel-iommu-test'] : []) + \ (host_os != 'windows' and \ config_all_devices.has_key('CONFIG_ACPI_ERST') ? ['erst-test'] : []) + \ (config_all_devices.has_key('CONFIG_PCIE_PORT') and \ From 42e2a7a0ab23784e44fcb18369e06067abc89305 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 12 Dec 2024 22:04:02 +1000 Subject: [PATCH 26/49] pci/msix: Fix msix pba read vector poll end calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The end vector calculation has a bug that results in polling fewer than required vectors when reading at a non-zero offset in PBA memory. Fixes: bbef882cc193 ("msi: add API to get notified about pending bit poll") Signed-off-by: Nicholas Piggin Message-Id: <20241212120402.1475053-1-npiggin@gmail.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/msix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci/msix.c b/hw/pci/msix.c index d8a55a6474..57ec7084a4 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -250,7 +250,7 @@ static uint64_t msix_pba_mmio_read(void *opaque, hwaddr addr, PCIDevice *dev = opaque; if (dev->msix_vector_poll_notifier) { unsigned vector_start = addr * 8; - unsigned vector_end = MIN(addr + size * 8, dev->msix_entries_nr); + unsigned vector_end = MIN((addr + size) * 8, dev->msix_entries_nr); dev->msix_vector_poll_notifier(dev, vector_start, vector_end); } From 239c3f7ed44d8b82a682a1c2e9e8c3355c10d321 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:17 +0100 Subject: [PATCH 27/49] acpi/ghes: get rid of ACPI_HEST_SRC_ID_RESERVED This is just duplicating ACPI_GHES_ERROR_SOURCE_COUNT, which has a better name. So, drop the duplication. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Igor Mammedov Reviewed-by: Jonathan Cameron Message-Id: <9012bf4c9630adf15a22af3c88fda8270916887b.1736945236.git.mchehab+huawei@kernel.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes.c | 7 ++----- include/hw/acpi/ghes.h | 3 ++- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index e9511d9b8f..dc217694de 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -34,9 +34,6 @@ /* The max size in bytes for one error block */ #define ACPI_GHES_MAX_RAW_DATA_LENGTH (1 * KiB) -/* Now only support ARMv8 SEA notification type error source */ -#define ACPI_GHES_ERROR_SOURCE_COUNT 1 - /* Generic Hardware Error Source version 2 */ #define ACPI_GHES_SOURCE_GENERIC_ERROR_V2 10 @@ -396,7 +393,7 @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) AcpiGedState *acpi_ged_state; AcpiGhesState *ags; - assert(source_id < ACPI_HEST_SRC_ID_RESERVED); + assert(source_id < ACPI_GHES_ERROR_SOURCE_COUNT); acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED, NULL)); @@ -407,7 +404,7 @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) if (physical_address) { - if (source_id < ACPI_HEST_SRC_ID_RESERVED) { + if (source_id < ACPI_GHES_ERROR_SOURCE_COUNT) { start_addr += source_id * sizeof(uint64_t); } diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h index 674f6958e9..59e3b8fb24 100644 --- a/include/hw/acpi/ghes.h +++ b/include/hw/acpi/ghes.h @@ -59,7 +59,8 @@ enum AcpiGhesNotifyType { enum { ACPI_HEST_SRC_ID_SEA = 0, /* future ids go here */ - ACPI_HEST_SRC_ID_RESERVED, + + ACPI_GHES_ERROR_SOURCE_COUNT }; typedef struct AcpiGhesState { From 872b69f21fe34f133982e02d04e33425d761d33b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:18 +0100 Subject: [PATCH 28/49] acpi/ghes: simplify acpi_ghes_record_errors() code Reduce the ident of the function and prepares it for the next changes. No functional changes. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Igor Mammedov Reviewed-by: Jonathan Cameron Message-Id: <19af4188535217213486d169e0501e592bc78a95.1736945236.git.mchehab+huawei@kernel.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes.c | 56 ++++++++++++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index dc217694de..e66f3be150 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -402,40 +402,42 @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) start_addr = le64_to_cpu(ags->ghes_addr_le); - if (physical_address) { + if (!physical_address) { + return -1; + } - if (source_id < ACPI_GHES_ERROR_SOURCE_COUNT) { - start_addr += source_id * sizeof(uint64_t); - } + if (source_id < ACPI_GHES_ERROR_SOURCE_COUNT) { + start_addr += source_id * sizeof(uint64_t); + } - cpu_physical_memory_read(start_addr, &error_block_addr, - sizeof(error_block_addr)); + cpu_physical_memory_read(start_addr, &error_block_addr, + sizeof(error_block_addr)); - error_block_addr = le64_to_cpu(error_block_addr); + error_block_addr = le64_to_cpu(error_block_addr); - read_ack_register_addr = start_addr + - ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t); + read_ack_register_addr = start_addr + + ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t); - cpu_physical_memory_read(read_ack_register_addr, - &read_ack_register, sizeof(read_ack_register)); + cpu_physical_memory_read(read_ack_register_addr, + &read_ack_register, sizeof(read_ack_register)); - /* zero means OSPM does not acknowledge the error */ - if (!read_ack_register) { - error_report("OSPM does not acknowledge previous error," - " so can not record CPER for current error anymore"); - } else if (error_block_addr) { - read_ack_register = cpu_to_le64(0); - /* - * Clear the Read Ack Register, OSPM will write it to 1 when - * it acknowledges this error. - */ - cpu_physical_memory_write(read_ack_register_addr, - &read_ack_register, sizeof(uint64_t)); + /* zero means OSPM does not acknowledge the error */ + if (!read_ack_register) { + error_report("OSPM does not acknowledge previous error," + " so can not record CPER for current error anymore"); + } else if (error_block_addr) { + read_ack_register = cpu_to_le64(0); + /* + * Clear the Read Ack Register, OSPM will write it to 1 when + * it acknowledges this error. + */ + cpu_physical_memory_write(read_ack_register_addr, + &read_ack_register, sizeof(uint64_t)); - ret = acpi_ghes_record_mem_error(error_block_addr, - physical_address); - } else - error_report("can not find Generic Error Status Block"); + ret = acpi_ghes_record_mem_error(error_block_addr, + physical_address); + } else { + error_report("can not find Generic Error Status Block"); } return ret; From 606a42c4c1d46b31a95cab09a7033e7f22f9ed3d Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:19 +0100 Subject: [PATCH 29/49] acpi/ghes: simplify the per-arch caller to build HEST table The GHES driver requires not only a HEST table, but also a separate firmware file to store Error Structure records. It can't do one without the other. Simplify the caller logic for it to require one function. No functional changes. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jonathan Cameron Reviewed-by: Igor Mammedov Message-Id: <9584bb8953385e165681d5d185c503f8df8ef42f.1736945236.git.mchehab+huawei@kernel.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes.c | 7 +++++-- hw/arm/virt-acpi-build.c | 5 ++--- include/hw/acpi/ghes.h | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index e66f3be150..4a6c45bcb4 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -233,7 +233,7 @@ static int acpi_ghes_record_mem_error(uint64_t error_block_address, * Initialize "etc/hardware_errors" and "etc/hardware_errors_addr" fw_cfg blobs. * See docs/specs/acpi_hest_ghes.rst for blobs format. */ -void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) +static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) { int i, error_status_block_offset; @@ -356,12 +356,15 @@ static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker) } /* Build Hardware Error Source Table */ -void acpi_build_hest(GArray *table_data, BIOSLinker *linker, +void acpi_build_hest(GArray *table_data, GArray *hardware_errors, + BIOSLinker *linker, const char *oem_id, const char *oem_table_id) { AcpiTable table = { .sig = "HEST", .rev = 1, .oem_id = oem_id, .oem_table_id = oem_table_id }; + build_ghes_error_table(hardware_errors, linker); + acpi_table_begin(&table, table_data); /* Error Source Count */ diff --git a/hw/arm/virt-acpi-build.c b/hw/arm/virt-acpi-build.c index c9b13057a3..3ac8f8e178 100644 --- a/hw/arm/virt-acpi-build.c +++ b/hw/arm/virt-acpi-build.c @@ -946,10 +946,9 @@ void virt_acpi_build(VirtMachineState *vms, AcpiBuildTables *tables) build_dbg2(tables_blob, tables->linker, vms); if (vms->ras) { - build_ghes_error_table(tables->hardware_errors, tables->linker); acpi_add_table(table_offsets, tables_blob); - acpi_build_hest(tables_blob, tables->linker, vms->oem_id, - vms->oem_table_id); + acpi_build_hest(tables_blob, tables->hardware_errors, tables->linker, + vms->oem_id, vms->oem_table_id); } if (ms->numa_state->num_nodes > 0) { diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h index 59e3b8fb24..20016c226d 100644 --- a/include/hw/acpi/ghes.h +++ b/include/hw/acpi/ghes.h @@ -68,8 +68,8 @@ typedef struct AcpiGhesState { bool present; /* True if GHES is present at all on this board */ } AcpiGhesState; -void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker); -void acpi_build_hest(GArray *table_data, BIOSLinker *linker, +void acpi_build_hest(GArray *table_data, GArray *hardware_errors, + BIOSLinker *linker, const char *oem_id, const char *oem_table_id); void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s, GArray *hardware_errors); From a85a3b729b3c31d8cc878017308997a8112655b2 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:20 +0100 Subject: [PATCH 30/49] acpi/ghes: better handle source_id and notification GHES has two fields that are stored on HEST error source blocks associated with notifications: - notification type, which is a number defined at the ACPI spec containing several arch-specific synchronous and assynchronous types; - source id, which is a HW/FW defined number, used to distinguish between different implemented sources. There could be several sources with the same notification type, which is dependent of the way each architecture maps notifications. Right now, build_ghes_v2() hardcodes a 1:1 mapping between such fields. Move it to two independent parameters, allowing the caller function to fill both. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jonathan Cameron Reviewed-by: Igor Mammedov Message-Id: <133ff72ea1041fed7dbcf97b7a2b0f4dfacde31a.1736945236.git.mchehab+huawei@kernel.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index 4a6c45bcb4..29cd7e4d81 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -284,9 +284,13 @@ static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) } /* Build Generic Hardware Error Source version 2 (GHESv2) */ -static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker) +static void build_ghes_v2(GArray *table_data, + BIOSLinker *linker, + enum AcpiGhesNotifyType notify, + uint16_t source_id) { uint64_t address_offset; + /* * Type: * Generic Hardware Error Source version 2(GHESv2 - Type 10) @@ -316,18 +320,8 @@ static void build_ghes_v2(GArray *table_data, int source_id, BIOSLinker *linker) address_offset + GAS_ADDR_OFFSET, sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, source_id * sizeof(uint64_t)); - switch (source_id) { - case ACPI_HEST_SRC_ID_SEA: - /* - * Notification Structure - * Now only enable ARMv8 SEA notification type - */ - build_ghes_hw_error_notification(table_data, ACPI_GHES_NOTIFY_SEA); - break; - default: - error_report("Not support this error source"); - abort(); - } + /* Notification Structure */ + build_ghes_hw_error_notification(table_data, notify); /* Error Status Block Length */ build_append_int_noprefix(table_data, ACPI_GHES_MAX_RAW_DATA_LENGTH, 4); @@ -369,7 +363,8 @@ void acpi_build_hest(GArray *table_data, GArray *hardware_errors, /* Error Source Count */ build_append_int_noprefix(table_data, ACPI_GHES_ERROR_SOURCE_COUNT, 4); - build_ghes_v2(table_data, ACPI_HEST_SRC_ID_SEA, linker); + build_ghes_v2(table_data, linker, + ACPI_GHES_NOTIFY_SEA, ACPI_HEST_SRC_ID_SEA); acpi_table_end(linker, &table); } From 5eb07a4ff067053b4d6bf55d2d614b65b00a476b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:21 +0100 Subject: [PATCH 31/49] acpi/ghes: Fix acpi_ghes_record_errors() argument Align the header file with the actual implementation of this function, as the first argument is source ID and not notification type. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jonathan Cameron Reviewed-by: Igor Mammedov Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/acpi/ghes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h index 20016c226d..50e3a25ea3 100644 --- a/include/hw/acpi/ghes.h +++ b/include/hw/acpi/ghes.h @@ -73,7 +73,7 @@ void acpi_build_hest(GArray *table_data, GArray *hardware_errors, const char *oem_id, const char *oem_table_id); void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s, GArray *hardware_errors); -int acpi_ghes_record_errors(uint8_t notify, uint64_t error_physical_addr); +int acpi_ghes_record_errors(uint8_t source_id, uint64_t error_physical_addr); /** * acpi_ghes_present: Report whether ACPI GHES table is present From 4ffedca347c458db17c464f9329222403fe54f22 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:22 +0100 Subject: [PATCH 32/49] acpi/ghes: Remove a duplicated out of bounds check acpi_ghes_record_errors() has an assert() at the beginning to ensure that source_id will be lower than ACPI_GHES_ERROR_SOURCE_COUNT. Remove a duplicated check. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Igor Mammedov Reviewed-by: Jonathan Cameron Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index 29cd7e4d81..5f67322bf0 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -404,9 +404,7 @@ int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) return -1; } - if (source_id < ACPI_GHES_ERROR_SOURCE_COUNT) { - start_addr += source_id * sizeof(uint64_t); - } + start_addr += source_id * sizeof(uint64_t); cpu_physical_memory_read(start_addr, &error_block_addr, sizeof(error_block_addr)); From 26e0893e420b34677e6e904a06edf55331bd937c Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:23 +0100 Subject: [PATCH 33/49] acpi/ghes: Change the type for source_id As described at: ACPI 6.5 spec at: 18.3.2. ACPI Error Source In particular at GHES/GHESv2 table: Table 18.10 Generic Hardware Error Source Structure HEST source ID is actually a 16-bit value. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jonathan Cameron Reviewed-by: Igor Mammedov Message-Id: <0e83ba548c1aedd1299fe387b94db78986590a34.1736945236.git.mchehab+huawei@kernel.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes-stub.c | 2 +- hw/acpi/ghes.c | 2 +- include/hw/acpi/ghes.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c index c315de1802..2b64cbd281 100644 --- a/hw/acpi/ghes-stub.c +++ b/hw/acpi/ghes-stub.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" #include "hw/acpi/ghes.h" -int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) +int acpi_ghes_record_errors(uint16_t source_id, uint64_t physical_address) { return -1; } diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index 5f67322bf0..edc74c38bf 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -383,7 +383,7 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s, ags->present = true; } -int acpi_ghes_record_errors(uint8_t source_id, uint64_t physical_address) +int acpi_ghes_record_errors(uint16_t source_id, uint64_t physical_address) { uint64_t error_block_addr, read_ack_register_addr, read_ack_register = 0; uint64_t start_addr; diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h index 50e3a25ea3..9295e46be2 100644 --- a/include/hw/acpi/ghes.h +++ b/include/hw/acpi/ghes.h @@ -73,7 +73,7 @@ void acpi_build_hest(GArray *table_data, GArray *hardware_errors, const char *oem_id, const char *oem_table_id); void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s, GArray *hardware_errors); -int acpi_ghes_record_errors(uint8_t source_id, uint64_t error_physical_addr); +int acpi_ghes_record_errors(uint16_t source_id, uint64_t error_physical_addr); /** * acpi_ghes_present: Report whether ACPI GHES table is present From 2e223c5ec1146b61163d3372ac629d9240d57cb1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:24 +0100 Subject: [PATCH 34/49] acpi/ghes: don't check if physical_address is not zero The 'physical_address' value is a faulty page. As such, 0 is as valid as any other value. Suggested-by: Igor Mammedov Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Igor Mammedov Reviewed-by: Jonathan Cameron Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index edc74c38bf..a3dffd78b0 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -400,10 +400,6 @@ int acpi_ghes_record_errors(uint16_t source_id, uint64_t physical_address) start_addr = le64_to_cpu(ags->ghes_addr_le); - if (!physical_address) { - return -1; - } - start_addr += source_id * sizeof(uint64_t); cpu_physical_memory_read(start_addr, &error_block_addr, From 48b0dcdd67d3fafbd07f6298257259dec4f541ce Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:25 +0100 Subject: [PATCH 35/49] acpi/ghes: make the GHES record generation more generic Split the code into separate functions to allow using the common CPER filling code by different error sources. The generic code was moved to ghes_record_cper_errors(), and ghes_gen_err_data_uncorrectable_recoverable() now contains only a logic to fill the Generic Error Data part of the record, as described at: ACPI 6.2: 18.3.2.7.1 Generic Error Data The remaining code to generate a memory error now belongs to acpi_ghes_record_errors() function. A further patch will give it a better name. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Igor Mammedov Reviewed-by: Jonathan Cameron Message-Id: <68d9f787d8c4fc8d1dbc227d6902fe801e42dea9.1736945236.git.mchehab+huawei@kernel.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes.c | 120 +++++++++++++++++++++++------------------ include/hw/acpi/ghes.h | 3 ++ 2 files changed, 72 insertions(+), 51 deletions(-) diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index a3dffd78b0..6f40cd35a9 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -181,51 +181,24 @@ static void acpi_ghes_build_append_mem_cper(GArray *table, build_append_int_noprefix(table, 0, 7); } -static int acpi_ghes_record_mem_error(uint64_t error_block_address, - uint64_t error_physical_addr) +static void +ghes_gen_err_data_uncorrectable_recoverable(GArray *block, + const uint8_t *section_type, + int data_length) { - GArray *block; - - /* Memory Error Section Type */ - const uint8_t uefi_cper_mem_sec[] = - UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \ - 0xED, 0x7C, 0x83, 0xB1); - /* invalid fru id: ACPI 4.0: 17.3.2.6.1 Generic Error Data, * Table 17-13 Generic Error Data Entry */ QemuUUID fru_id = {}; - uint32_t data_length; - - block = g_array_new(false, true /* clear */, 1); - - /* This is the length if adding a new generic error data entry*/ - data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH; - /* - * It should not run out of the preallocated memory if adding a new generic - * error data entry - */ - assert((data_length + ACPI_GHES_GESB_SIZE) <= - ACPI_GHES_MAX_RAW_DATA_LENGTH); /* Build the new generic error status block header */ acpi_ghes_generic_error_status(block, ACPI_GEBS_UNCORRECTABLE, 0, 0, data_length, ACPI_CPER_SEV_RECOVERABLE); /* Build this new generic error data entry header */ - acpi_ghes_generic_error_data(block, uefi_cper_mem_sec, + acpi_ghes_generic_error_data(block, section_type, ACPI_CPER_SEV_RECOVERABLE, 0, 0, ACPI_GHES_MEM_CPER_LENGTH, fru_id, 0); - - /* Build the memory section CPER for above new generic error data entry */ - acpi_ghes_build_append_mem_cper(block, error_physical_addr); - - /* Write the generic error data entry into guest memory */ - cpu_physical_memory_write(error_block_address, block->data, block->len); - - g_array_free(block, true); - - return 0; } /* @@ -383,15 +356,18 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s, ags->present = true; } -int acpi_ghes_record_errors(uint16_t source_id, uint64_t physical_address) +void ghes_record_cper_errors(const void *cper, size_t len, + uint16_t source_id, Error **errp) { uint64_t error_block_addr, read_ack_register_addr, read_ack_register = 0; uint64_t start_addr; - bool ret = -1; AcpiGedState *acpi_ged_state; AcpiGhesState *ags; - assert(source_id < ACPI_GHES_ERROR_SOURCE_COUNT); + if (len > ACPI_GHES_MAX_RAW_DATA_LENGTH) { + error_setg(errp, "GHES CPER record is too big: %zd", len); + return; + } acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED, NULL)); @@ -406,6 +382,10 @@ int acpi_ghes_record_errors(uint16_t source_id, uint64_t physical_address) sizeof(error_block_addr)); error_block_addr = le64_to_cpu(error_block_addr); + if (!error_block_addr) { + error_setg(errp, "can not find Generic Error Status Block"); + return; + } read_ack_register_addr = start_addr + ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t); @@ -415,24 +395,62 @@ int acpi_ghes_record_errors(uint16_t source_id, uint64_t physical_address) /* zero means OSPM does not acknowledge the error */ if (!read_ack_register) { - error_report("OSPM does not acknowledge previous error," - " so can not record CPER for current error anymore"); - } else if (error_block_addr) { - read_ack_register = cpu_to_le64(0); - /* - * Clear the Read Ack Register, OSPM will write it to 1 when - * it acknowledges this error. - */ - cpu_physical_memory_write(read_ack_register_addr, - &read_ack_register, sizeof(uint64_t)); - - ret = acpi_ghes_record_mem_error(error_block_addr, - physical_address); - } else { - error_report("can not find Generic Error Status Block"); + error_setg(errp, + "OSPM does not acknowledge previous error," + " so can not record CPER for current error anymore"); + return; } - return ret; + read_ack_register = cpu_to_le64(0); + /* + * Clear the Read Ack Register, OSPM will write 1 to this register when + * it acknowledges the error. + */ + cpu_physical_memory_write(read_ack_register_addr, + &read_ack_register, sizeof(uint64_t)); + + /* Write the generic error data entry into guest memory */ + cpu_physical_memory_write(error_block_addr, cper, len); + + return; +} + +int acpi_ghes_record_errors(uint16_t source_id, uint64_t physical_address) +{ + /* Memory Error Section Type */ + const uint8_t guid[] = + UUID_LE(0xA5BC1114, 0x6F64, 0x4EDE, 0xB8, 0x63, 0x3E, 0x83, \ + 0xED, 0x7C, 0x83, 0xB1); + Error *errp = NULL; + int data_length; + GArray *block; + + block = g_array_new(false, true /* clear */, 1); + + data_length = ACPI_GHES_DATA_LENGTH + ACPI_GHES_MEM_CPER_LENGTH; + /* + * It should not run out of the preallocated memory if adding a new generic + * error data entry + */ + assert((data_length + ACPI_GHES_GESB_SIZE) <= + ACPI_GHES_MAX_RAW_DATA_LENGTH); + + ghes_gen_err_data_uncorrectable_recoverable(block, guid, data_length); + + /* Build the memory section CPER for above new generic error data entry */ + acpi_ghes_build_append_mem_cper(block, physical_address); + + /* Report the error */ + ghes_record_cper_errors(block->data, block->len, source_id, &errp); + + g_array_free(block, true); + + if (errp) { + error_report_err(errp); + return -1; + } + + return 0; } bool acpi_ghes_present(void) diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h index 9295e46be2..8859346af5 100644 --- a/include/hw/acpi/ghes.h +++ b/include/hw/acpi/ghes.h @@ -23,6 +23,7 @@ #define ACPI_GHES_H #include "hw/acpi/bios-linker-loader.h" +#include "qapi/error.h" /* * Values for Hardware Error Notification Type field @@ -73,6 +74,8 @@ void acpi_build_hest(GArray *table_data, GArray *hardware_errors, const char *oem_id, const char *oem_table_id); void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s, GArray *hardware_errors); +void ghes_record_cper_errors(const void *cper, size_t len, + uint16_t source_id, Error **errp); int acpi_ghes_record_errors(uint16_t source_id, uint64_t error_physical_addr); /** From d32028a54000db671eb2d0b6b28bbf15acc2e5f9 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:26 +0100 Subject: [PATCH 36/49] acpi/ghes: better name GHES memory error function The current function used to generate GHES data is specific for memory errors. Give a better name for it, as we now have a generic function as well. Reviewed-by: Igor Mammedov Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab Message-Id: <35b59121129d5e99cb5062cc3d775594bbb0905b.1736945236.git.mchehab+huawei@kernel.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes-stub.c | 2 +- hw/acpi/ghes.c | 2 +- include/hw/acpi/ghes.h | 4 ++-- target/arm/kvm.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/acpi/ghes-stub.c b/hw/acpi/ghes-stub.c index 2b64cbd281..7cec1812da 100644 --- a/hw/acpi/ghes-stub.c +++ b/hw/acpi/ghes-stub.c @@ -11,7 +11,7 @@ #include "qemu/osdep.h" #include "hw/acpi/ghes.h" -int acpi_ghes_record_errors(uint16_t source_id, uint64_t physical_address) +int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address) { return -1; } diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index 6f40cd35a9..66bd98337a 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -415,7 +415,7 @@ void ghes_record_cper_errors(const void *cper, size_t len, return; } -int acpi_ghes_record_errors(uint16_t source_id, uint64_t physical_address) +int acpi_ghes_memory_errors(uint16_t source_id, uint64_t physical_address) { /* Memory Error Section Type */ const uint8_t guid[] = diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h index 8859346af5..21666a4bcc 100644 --- a/include/hw/acpi/ghes.h +++ b/include/hw/acpi/ghes.h @@ -74,15 +74,15 @@ void acpi_build_hest(GArray *table_data, GArray *hardware_errors, const char *oem_id, const char *oem_table_id); void acpi_ghes_add_fw_cfg(AcpiGhesState *vms, FWCfgState *s, GArray *hardware_errors); +int acpi_ghes_memory_errors(uint16_t source_id, uint64_t error_physical_addr); void ghes_record_cper_errors(const void *cper, size_t len, uint16_t source_id, Error **errp); -int acpi_ghes_record_errors(uint16_t source_id, uint64_t error_physical_addr); /** * acpi_ghes_present: Report whether ACPI GHES table is present * * Returns: true if the system has an ACPI GHES table and it is - * safe to call acpi_ghes_record_errors() to record a memory error. + * safe to call acpi_ghes_memory_errors() to record a memory error. */ bool acpi_ghes_present(void); #endif diff --git a/target/arm/kvm.c b/target/arm/kvm.c index a9444a2c7a..da30bdbb23 100644 --- a/target/arm/kvm.c +++ b/target/arm/kvm.c @@ -2387,7 +2387,7 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) */ if (code == BUS_MCEERR_AR) { kvm_cpu_synchronize_state(c); - if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { + if (!acpi_ghes_memory_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { kvm_inject_arm_sea(c); } else { error_report("failed to record the error"); From 1acc8d4e647e3d9fc45cc43c216e784e47a74809 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:27 +0100 Subject: [PATCH 37/49] acpi/ghes: don't crash QEMU if ghes GED is not found Make error handling within ghes_record_cper_errors() consistent, i.e. instead abort just print a error in case ghes GED is not found. Reviewed-by: Jonathan Cameron Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Igor Mammedov Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index 66bd98337a..6843ddf64b 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -371,7 +371,10 @@ void ghes_record_cper_errors(const void *cper, size_t len, acpi_ged_state = ACPI_GED(object_resolve_path_type("", TYPE_ACPI_GED, NULL)); - g_assert(acpi_ged_state); + if (!acpi_ged_state) { + error_setg(errp, "Can't find ACPI_GED object"); + return; + } ags = &acpi_ged_state->ghes_state; start_addr = le64_to_cpu(ags->ghes_addr_le); From 4651745dfc8d384bd260969c23d8423741462eac Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:28 +0100 Subject: [PATCH 38/49] acpi/ghes: rename etc/hardware_error file macros Now that we have also have a file to store HEST data location, which is part of GHES, better name the file where CPER records are stored. No functional changes. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Igor Mammedov Reviewed-by: Jonathan Cameron Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes.c | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index 6843ddf64b..3f94a5542b 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -28,8 +28,8 @@ #include "hw/nvram/fw_cfg.h" #include "qemu/uuid.h" -#define ACPI_GHES_ERRORS_FW_CFG_FILE "etc/hardware_errors" -#define ACPI_GHES_DATA_ADDR_FW_CFG_FILE "etc/hardware_errors_addr" +#define ACPI_HW_ERROR_FW_CFG_FILE "etc/hardware_errors" +#define ACPI_HW_ERROR_ADDR_FW_CFG_FILE "etc/hardware_errors_addr" /* The max size in bytes for one error block */ #define ACPI_GHES_MAX_RAW_DATA_LENGTH (1 * KiB) @@ -234,7 +234,7 @@ static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) ACPI_GHES_MAX_RAW_DATA_LENGTH * ACPI_GHES_ERROR_SOURCE_COUNT); /* Tell guest firmware to place hardware_errors blob into RAM */ - bios_linker_loader_alloc(linker, ACPI_GHES_ERRORS_FW_CFG_FILE, + bios_linker_loader_alloc(linker, ACPI_HW_ERROR_FW_CFG_FILE, hardware_errors, sizeof(uint64_t), false); for (i = 0; i < ACPI_GHES_ERROR_SOURCE_COUNT; i++) { @@ -243,17 +243,21 @@ static void build_ghes_error_table(GArray *hardware_errors, BIOSLinker *linker) * corresponding "Generic Error Status Block" */ bios_linker_loader_add_pointer(linker, - ACPI_GHES_ERRORS_FW_CFG_FILE, sizeof(uint64_t) * i, - sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, - error_status_block_offset + i * ACPI_GHES_MAX_RAW_DATA_LENGTH); + ACPI_HW_ERROR_FW_CFG_FILE, + sizeof(uint64_t) * i, + sizeof(uint64_t), + ACPI_HW_ERROR_FW_CFG_FILE, + error_status_block_offset + + i * ACPI_GHES_MAX_RAW_DATA_LENGTH); } /* * tell firmware to write hardware_errors GPA into * hardware_errors_addr fw_cfg, once the former has been initialized. */ - bios_linker_loader_write_pointer(linker, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, - 0, sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, 0); + bios_linker_loader_write_pointer(linker, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, 0, + sizeof(uint64_t), + ACPI_HW_ERROR_FW_CFG_FILE, 0); } /* Build Generic Hardware Error Source version 2 (GHESv2) */ @@ -290,8 +294,10 @@ static void build_ghes_v2(GArray *table_data, build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0, 4 /* QWord access */, 0); bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, - address_offset + GAS_ADDR_OFFSET, sizeof(uint64_t), - ACPI_GHES_ERRORS_FW_CFG_FILE, source_id * sizeof(uint64_t)); + address_offset + GAS_ADDR_OFFSET, + sizeof(uint64_t), + ACPI_HW_ERROR_FW_CFG_FILE, + source_id * sizeof(uint64_t)); /* Notification Structure */ build_ghes_hw_error_notification(table_data, notify); @@ -308,9 +314,11 @@ static void build_ghes_v2(GArray *table_data, build_append_gas(table_data, AML_AS_SYSTEM_MEMORY, 0x40, 0, 4 /* QWord access */, 0); bios_linker_loader_add_pointer(linker, ACPI_BUILD_TABLE_FILE, - address_offset + GAS_ADDR_OFFSET, - sizeof(uint64_t), ACPI_GHES_ERRORS_FW_CFG_FILE, - (ACPI_GHES_ERROR_SOURCE_COUNT + source_id) * sizeof(uint64_t)); + address_offset + GAS_ADDR_OFFSET, + sizeof(uint64_t), + ACPI_HW_ERROR_FW_CFG_FILE, + (ACPI_GHES_ERROR_SOURCE_COUNT + source_id) + * sizeof(uint64_t)); /* * Read Ack Preserve field @@ -346,11 +354,11 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s, GArray *hardware_error) { /* Create a read-only fw_cfg file for GHES */ - fw_cfg_add_file(s, ACPI_GHES_ERRORS_FW_CFG_FILE, hardware_error->data, + fw_cfg_add_file(s, ACPI_HW_ERROR_FW_CFG_FILE, hardware_error->data, hardware_error->len); /* Create a read-write fw_cfg file for Address */ - fw_cfg_add_file_callback(s, ACPI_GHES_DATA_ADDR_FW_CFG_FILE, NULL, NULL, + fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL, NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false); ags->present = true; From 652f6d86cbb60e193edc2510c365b75229734ccf Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:29 +0100 Subject: [PATCH 39/49] acpi/ghes: better name the offset of the hardware error firmware The hardware error firmware is where HEST error structures are stored. Those can be GHESv2, but they can also be other types. Better name the location of the hardware error. No functional changes. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jonathan Cameron Reviewed-by: Igor Mammedov Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/generic_event_device.c | 4 ++-- hw/acpi/ghes.c | 4 ++-- include/hw/acpi/ghes.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/acpi/generic_event_device.c b/hw/acpi/generic_event_device.c index 58540c0aaf..c85d97ca37 100644 --- a/hw/acpi/generic_event_device.c +++ b/hw/acpi/generic_event_device.c @@ -363,7 +363,7 @@ static const VMStateDescription vmstate_ghes = { .version_id = 1, .minimum_version_id = 1, .fields = (const VMStateField[]) { - VMSTATE_UINT64(ghes_addr_le, AcpiGhesState), + VMSTATE_UINT64(hw_error_le, AcpiGhesState), VMSTATE_END_OF_LIST() }, }; @@ -371,7 +371,7 @@ static const VMStateDescription vmstate_ghes = { static bool ghes_needed(void *opaque) { AcpiGedState *s = opaque; - return s->ghes_state.ghes_addr_le; + return s->ghes_state.hw_error_le; } static const VMStateDescription vmstate_ghes_state = { diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index 3f94a5542b..983e28505a 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -359,7 +359,7 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s, /* Create a read-write fw_cfg file for Address */ fw_cfg_add_file_callback(s, ACPI_HW_ERROR_ADDR_FW_CFG_FILE, NULL, NULL, - NULL, &(ags->ghes_addr_le), sizeof(ags->ghes_addr_le), false); + NULL, &(ags->hw_error_le), sizeof(ags->hw_error_le), false); ags->present = true; } @@ -385,7 +385,7 @@ void ghes_record_cper_errors(const void *cper, size_t len, } ags = &acpi_ged_state->ghes_state; - start_addr = le64_to_cpu(ags->ghes_addr_le); + start_addr = le64_to_cpu(ags->hw_error_le); start_addr += source_id * sizeof(uint64_t); diff --git a/include/hw/acpi/ghes.h b/include/hw/acpi/ghes.h index 21666a4bcc..39619a2457 100644 --- a/include/hw/acpi/ghes.h +++ b/include/hw/acpi/ghes.h @@ -65,7 +65,7 @@ enum { }; typedef struct AcpiGhesState { - uint64_t ghes_addr_le; + uint64_t hw_error_le; bool present; /* True if GHES is present at all on this board */ } AcpiGhesState; From 1cd59b8981ce234c1d790111afce4a32218a88dd Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:30 +0100 Subject: [PATCH 40/49] acpi/ghes: move offset calculus to a separate function Currently, CPER address location is calculated as an offset of the hardware_errors table. It is also badly named, as the offset actually used is the address where the CPER data starts, and not the beginning of the error source. Move the logic which calculates such offset to a separate function, in preparation for a patch that will be changing the logic to calculate it from the HEST table. While here, properly name the variable which stores the cper address. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jonathan Cameron Message-Id: <60fdd1bf379ba1db3099710868802aa49a27febb.1736945236.git.mchehab+huawei@kernel.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index 983e28505a..ddb576b940 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -364,10 +364,37 @@ void acpi_ghes_add_fw_cfg(AcpiGhesState *ags, FWCfgState *s, ags->present = true; } +static void get_hw_error_offsets(uint64_t ghes_addr, + uint64_t *cper_addr, + uint64_t *read_ack_register_addr) +{ + if (!ghes_addr) { + return; + } + + /* + * non-HEST version supports only one source, so no need to change + * the start offset based on the source ID. Also, we can't validate + * the source ID, as it is stored inside the HEST table. + */ + + cpu_physical_memory_read(ghes_addr, cper_addr, + sizeof(*cper_addr)); + + *cper_addr = le64_to_cpu(*cper_addr); + + /* + * As the current version supports only one source, the ack offset is + * just sizeof(uint64_t). + */ + *read_ack_register_addr = ghes_addr + + ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t); +} + void ghes_record_cper_errors(const void *cper, size_t len, uint16_t source_id, Error **errp) { - uint64_t error_block_addr, read_ack_register_addr, read_ack_register = 0; + uint64_t cper_addr = 0, read_ack_register_addr = 0, read_ack_register; uint64_t start_addr; AcpiGedState *acpi_ged_state; AcpiGhesState *ags; @@ -389,18 +416,13 @@ void ghes_record_cper_errors(const void *cper, size_t len, start_addr += source_id * sizeof(uint64_t); - cpu_physical_memory_read(start_addr, &error_block_addr, - sizeof(error_block_addr)); + get_hw_error_offsets(start_addr, &cper_addr, &read_ack_register_addr); - error_block_addr = le64_to_cpu(error_block_addr); - if (!error_block_addr) { + if (!cper_addr) { error_setg(errp, "can not find Generic Error Status Block"); return; } - read_ack_register_addr = start_addr + - ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t); - cpu_physical_memory_read(read_ack_register_addr, &read_ack_register, sizeof(read_ack_register)); @@ -421,7 +443,7 @@ void ghes_record_cper_errors(const void *cper, size_t len, &read_ack_register, sizeof(uint64_t)); /* Write the generic error data entry into guest memory */ - cpu_physical_memory_write(error_block_addr, cper, len); + cpu_physical_memory_write(cper_addr, cper, len); return; } From 47935fc1e56f02c892d186ef89be7c923f73c89b Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:31 +0100 Subject: [PATCH 41/49] acpi/ghes: Change ghes fill logic to work with only one source Extending to multiple sources require a BIOS pointer to the beginning of the HEST table, which in turn requires a backward-compatible code. So, the current code supports only one source. Ensure that and simplify the code. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jonathan Cameron Reviewed-by: Igor Mammedov Message-Id: <66bddd42a64c8515ad98b9975d953b4a70ffcc6d.1736945236.git.mchehab+huawei@kernel.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/ghes.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/hw/acpi/ghes.c b/hw/acpi/ghes.c index ddb576b940..b709c177cd 100644 --- a/hw/acpi/ghes.c +++ b/hw/acpi/ghes.c @@ -387,15 +387,13 @@ static void get_hw_error_offsets(uint64_t ghes_addr, * As the current version supports only one source, the ack offset is * just sizeof(uint64_t). */ - *read_ack_register_addr = ghes_addr + - ACPI_GHES_ERROR_SOURCE_COUNT * sizeof(uint64_t); + *read_ack_register_addr = ghes_addr + sizeof(uint64_t); } void ghes_record_cper_errors(const void *cper, size_t len, uint16_t source_id, Error **errp) { uint64_t cper_addr = 0, read_ack_register_addr = 0, read_ack_register; - uint64_t start_addr; AcpiGedState *acpi_ged_state; AcpiGhesState *ags; @@ -412,11 +410,9 @@ void ghes_record_cper_errors(const void *cper, size_t len, } ags = &acpi_ged_state->ghes_state; - start_addr = le64_to_cpu(ags->hw_error_le); - - start_addr += source_id * sizeof(uint64_t); - - get_hw_error_offsets(start_addr, &cper_addr, &read_ack_register_addr); + assert(ACPI_GHES_ERROR_SOURCE_COUNT == 1); + get_hw_error_offsets(le64_to_cpu(ags->hw_error_le), + &cper_addr, &read_ack_register_addr); if (!cper_addr) { error_setg(errp, "can not find Generic Error Status Block"); From 84c146758d79b3689b6c9c7815b6bfbb70ba06b0 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 15 Jan 2025 13:50:32 +0100 Subject: [PATCH 42/49] docs: acpi_hest_ghes: fix documentation for CPER size While the spec defines a CPER size of 4KiB for each record, currently it is set to 1KiB. Fix the documentation and add a pointer to the macro name there, as this may help to keep it updated. Signed-off-by: Mauro Carvalho Chehab Reviewed-by: Jonathan Cameron Reviewed-by: Igor Mammedov Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/specs/acpi_hest_ghes.rst | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/docs/specs/acpi_hest_ghes.rst b/docs/specs/acpi_hest_ghes.rst index 68f1fbe0a4..c3e9f8d9a7 100644 --- a/docs/specs/acpi_hest_ghes.rst +++ b/docs/specs/acpi_hest_ghes.rst @@ -67,8 +67,10 @@ Design Details (3) The address registers table contains N Error Block Address entries and N Read Ack Register entries. The size for each entry is 8-byte. The Error Status Data Block table contains N Error Status Data Block - entries. The size for each entry is 4096(0x1000) bytes. The total size - for the "etc/hardware_errors" fw_cfg blob is (N * 8 * 2 + N * 4096) bytes. + entries. The size for each entry is defined at the source code as + ACPI_GHES_MAX_RAW_DATA_LENGTH (currently 1024 bytes). The total size + for the "etc/hardware_errors" fw_cfg blob is + (N * 8 * 2 + N * ACPI_GHES_MAX_RAW_DATA_LENGTH) bytes. N is the number of the kinds of hardware error sources. (4) QEMU generates the ACPI linker/loader script for the firmware. The From 1ad32644fe4c9fb25086be15a66dde1d55d3410f Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 15 Jan 2025 13:53:40 +0100 Subject: [PATCH 43/49] tests: acpi: whitelist expected blobs Signed-off-by: Igor Mammedov Message-Id: <20250115125342.3883374-2-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test-allowed-diff.h | 40 +++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..085dfa9ff4 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,41 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/x86/pc/DSDT", +"tests/data/acpi/x86/pc/DSDT.acpierst", +"tests/data/acpi/x86/pc/DSDT.acpihmat", +"tests/data/acpi/x86/pc/DSDT.bridge", +"tests/data/acpi/x86/pc/DSDT.cphp", +"tests/data/acpi/x86/pc/DSDT.dimmpxm", +"tests/data/acpi/x86/pc/DSDT.hpbridge", +"tests/data/acpi/x86/pc/DSDT.ipmikcs", +"tests/data/acpi/x86/pc/DSDT.memhp", +"tests/data/acpi/x86/pc/DSDT.nohpet", +"tests/data/acpi/x86/pc/DSDT.numamem", +"tests/data/acpi/x86/pc/DSDT.roothp", +"tests/data/acpi/x86/q35/DSDT", +"tests/data/acpi/x86/q35/DSDT.acpierst", +"tests/data/acpi/x86/q35/DSDT.acpihmat", +"tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x", +"tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator", +"tests/data/acpi/x86/q35/DSDT.applesmc", +"tests/data/acpi/x86/q35/DSDT.bridge", +"tests/data/acpi/x86/q35/DSDT.core-count", +"tests/data/acpi/x86/q35/DSDT.core-count2", +"tests/data/acpi/x86/q35/DSDT.cphp", +"tests/data/acpi/x86/q35/DSDT.cxl", +"tests/data/acpi/x86/q35/DSDT.dimmpxm", +"tests/data/acpi/x86/q35/DSDT.ipmibt", +"tests/data/acpi/x86/q35/DSDT.ipmismbus", +"tests/data/acpi/x86/q35/DSDT.ivrs", +"tests/data/acpi/x86/q35/DSDT.memhp", +"tests/data/acpi/x86/q35/DSDT.mmio64", +"tests/data/acpi/x86/q35/DSDT.multi-bridge", +"tests/data/acpi/x86/q35/DSDT.nohpet", +"tests/data/acpi/x86/q35/DSDT.numamem", +"tests/data/acpi/x86/q35/DSDT.pvpanic-isa", +"tests/data/acpi/x86/q35/DSDT.thread-count", +"tests/data/acpi/x86/q35/DSDT.thread-count2", +"tests/data/acpi/x86/q35/DSDT.tis.tpm12", +"tests/data/acpi/x86/q35/DSDT.tis.tpm2", +"tests/data/acpi/x86/q35/DSDT.type4-count", +"tests/data/acpi/x86/q35/DSDT.viot", +"tests/data/acpi/x86/q35/DSDT.xapic", From 0b053391985abcc40b16ac8fc4a7f6588d1d95c1 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 15 Jan 2025 13:53:41 +0100 Subject: [PATCH 44/49] pci: acpi: Windows 'PCI Label Id' bug workaround Current versions of Windows call _DSM(func=7) regardless of whether it is supported or not. It leads to NICs having bogus 'PCI Label Id = 0', where none should be set at all. Also presence of 'PCI Label Id' triggers another Windows bug on localized versions that leads to hangs. The later bug is fixed in latest updates for 'Windows Server' but not in consumer versions of Windows (and there is no plans to fix it as far as I'm aware). Given it's easy, implement Microsoft suggested workaround (return invalid Package) so that affected Windows versions could boot on QEMU. This would effectvely remove bogus 'PCI Label Id's on NICs, but MS teem confirmed that flipping 'PCI Label Id' should not change 'Network Connection' ennumeration, so it should be safe for QEMU to change _DSM without any compat code. Smoke tested with WinXP and WS2022 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/774 Signed-off-by: Igor Mammedov Message-Id: <20250115125342.3883374-3-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 733b8f0851..1311a0d4f3 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -654,6 +654,7 @@ static Aml *aml_pci_pdsm(void) Aml *acpi_index = aml_local(2); Aml *zero = aml_int(0); Aml *one = aml_int(1); + Aml *not_supp = aml_int(0xFFFFFFFF); Aml *func = aml_arg(2); Aml *params = aml_arg(4); Aml *bnum = aml_derefof(aml_index(params, aml_int(0))); @@ -678,7 +679,7 @@ static Aml *aml_pci_pdsm(void) */ ifctx1 = aml_if(aml_lnot( aml_or(aml_equal(acpi_index, zero), - aml_equal(acpi_index, aml_int(0xFFFFFFFF)), NULL) + aml_equal(acpi_index, not_supp), NULL) )); { /* have supported functions */ @@ -704,18 +705,30 @@ static Aml *aml_pci_pdsm(void) { Aml *pkg = aml_package(2); - aml_append(pkg, zero); - /* - * optional, if not impl. should return null string - */ - aml_append(pkg, aml_string("%s", "")); - aml_append(ifctx, aml_store(pkg, ret)); - aml_append(ifctx, aml_store(aml_call2("AIDX", bnum, sunum), acpi_index)); + aml_append(ifctx, aml_store(pkg, ret)); /* - * update acpi-index to actual value + * Windows calls func=7 without checking if it's available, + * as workaround Microsoft has suggested to return invalid for func7 + * Package, so return 2 elements package but only initialize elements + * when acpi_index is supported and leave them uninitialized, which + * leads elements to being Uninitialized ObjectType and should trip + * Windows into discarding result as an unexpected and prevent setting + * bogus 'PCI Label' on the device. */ - aml_append(ifctx, aml_store(acpi_index, aml_index(ret, zero))); + ifctx1 = aml_if(aml_lnot(aml_lor( + aml_equal(acpi_index, zero), aml_equal(acpi_index, not_supp) + ))); + { + aml_append(ifctx1, aml_store(acpi_index, aml_index(ret, zero))); + /* + * optional, if not impl. should return null string + */ + aml_append(ifctx1, aml_store(aml_string("%s", ""), + aml_index(ret, one))); + } + aml_append(ifctx, ifctx1); + aml_append(ifctx, aml_return(ret)); } From 9fb1c9a1bb26e111ee5fa5538070cd684de14c08 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Wed, 15 Jan 2025 13:53:42 +0100 Subject: [PATCH 45/49] tests: acpi: update expected blobs _DSM function 7 AML should have followig change: If ((Arg2 == 0x07)) { - Local0 = Package (0x02) - { - Zero, - "" - } Local2 = AIDX (DerefOf (Arg4 [Zero]), DerefOf (Arg4 [One] )) - Local0 [Zero] = Local2 + Local0 = Package (0x02) {} + If (!((Local2 == Zero) || (Local2 == 0xFFFFFFFF))) + { + Local0 [Zero] = Local2 + Local0 [One] = "" + } + Return (Local0) } } Signed-off-by: Igor Mammedov Message-Id: <20250115125342.3883374-4-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/x86/pc/DSDT | Bin 8593 -> 8611 bytes tests/data/acpi/x86/pc/DSDT.acpierst | Bin 8504 -> 8522 bytes tests/data/acpi/x86/pc/DSDT.acpihmat | Bin 9918 -> 9936 bytes tests/data/acpi/x86/pc/DSDT.bridge | Bin 15464 -> 15482 bytes tests/data/acpi/x86/pc/DSDT.cphp | Bin 9057 -> 9075 bytes tests/data/acpi/x86/pc/DSDT.dimmpxm | Bin 10247 -> 10265 bytes tests/data/acpi/x86/pc/DSDT.hpbridge | Bin 8544 -> 8562 bytes tests/data/acpi/x86/pc/DSDT.ipmikcs | Bin 8665 -> 8683 bytes tests/data/acpi/x86/pc/DSDT.memhp | Bin 9952 -> 9970 bytes tests/data/acpi/x86/pc/DSDT.nohpet | Bin 8451 -> 8469 bytes tests/data/acpi/x86/pc/DSDT.numamem | Bin 8599 -> 8617 bytes tests/data/acpi/x86/pc/DSDT.roothp | Bin 12386 -> 12404 bytes tests/data/acpi/x86/q35/DSDT | Bin 8422 -> 8440 bytes tests/data/acpi/x86/q35/DSDT.acpierst | Bin 8439 -> 8457 bytes tests/data/acpi/x86/q35/DSDT.acpihmat | Bin 9747 -> 9765 bytes .../data/acpi/x86/q35/DSDT.acpihmat-generic-x | Bin 12632 -> 12650 bytes .../acpi/x86/q35/DSDT.acpihmat-noinitiator | Bin 8701 -> 8719 bytes tests/data/acpi/x86/q35/DSDT.applesmc | Bin 8468 -> 8486 bytes tests/data/acpi/x86/q35/DSDT.bridge | Bin 12035 -> 12053 bytes tests/data/acpi/x86/q35/DSDT.core-count | Bin 12980 -> 12998 bytes tests/data/acpi/x86/q35/DSDT.core-count2 | Bin 33837 -> 33855 bytes tests/data/acpi/x86/q35/DSDT.cphp | Bin 8886 -> 8904 bytes tests/data/acpi/x86/q35/DSDT.cxl | Bin 13213 -> 13231 bytes tests/data/acpi/x86/q35/DSDT.dimmpxm | Bin 10076 -> 10094 bytes tests/data/acpi/x86/q35/DSDT.ipmibt | Bin 8497 -> 8515 bytes tests/data/acpi/x86/q35/DSDT.ipmismbus | Bin 8510 -> 8528 bytes tests/data/acpi/x86/q35/DSDT.ivrs | Bin 8439 -> 8457 bytes tests/data/acpi/x86/q35/DSDT.memhp | Bin 9781 -> 9799 bytes tests/data/acpi/x86/q35/DSDT.mmio64 | Bin 9552 -> 9570 bytes tests/data/acpi/x86/q35/DSDT.multi-bridge | Bin 13275 -> 13293 bytes tests/data/acpi/x86/q35/DSDT.nohpet | Bin 8280 -> 8298 bytes tests/data/acpi/x86/q35/DSDT.numamem | Bin 8428 -> 8446 bytes tests/data/acpi/x86/q35/DSDT.pvpanic-isa | Bin 8523 -> 8541 bytes tests/data/acpi/x86/q35/DSDT.thread-count | Bin 12980 -> 12998 bytes tests/data/acpi/x86/q35/DSDT.thread-count2 | Bin 33837 -> 33855 bytes tests/data/acpi/x86/q35/DSDT.tis.tpm12 | Bin 9028 -> 9046 bytes tests/data/acpi/x86/q35/DSDT.tis.tpm2 | Bin 9054 -> 9072 bytes tests/data/acpi/x86/q35/DSDT.type4-count | Bin 18656 -> 18674 bytes tests/data/acpi/x86/q35/DSDT.viot | Bin 14679 -> 14697 bytes tests/data/acpi/x86/q35/DSDT.xapic | Bin 35785 -> 35803 bytes tests/qtest/bios-tables-test-allowed-diff.h | 40 ------------------ 41 files changed, 40 deletions(-) diff --git a/tests/data/acpi/x86/pc/DSDT b/tests/data/acpi/x86/pc/DSDT index 60d50b088a362556fd54395cb15364d6c0936be5..4beb5194b84a711fcb52e3e52cc2096497d18442 100644 GIT binary patch delta 89 zcmbQ}yx5t`CD3-R delta 71 zcmZ4NJkgoUCDk-)%Ez{}8)z_{6%iB%o|3(g%c delta 71 zcmX@*w8M$ZCDJ%>k diff --git a/tests/data/acpi/x86/pc/DSDT.acpihmat b/tests/data/acpi/x86/pc/DSDT.acpihmat index 61b7d5caa55c44dbf69d649110c6b14bb4c3fdf5..d081db26d7ba504b3344fad130d5812419291ac0 100644 GIT binary patch delta 89 zcmdnzd%>5>CDf|gg_5vYRCI()HgaSuT amx$($90mp;!N`zQkkpaDu-TQVTLl0-@)g_w diff --git a/tests/data/acpi/x86/pc/DSDT.bridge b/tests/data/acpi/x86/pc/DSDT.bridge index d43e148bed19160f39d88ccf3364544150a3f87f..e16897dc5f0fbb3f7b4de8db913884046246cc3b 100644 GIT binary patch delta 89 zcmaD+@vDN%CDk-)%Ez{}8)z_{6%Nly^~MG+nf delta 71 zcmezD_Rx*XCDIz366_KpslmX&C@_)h0h6ET#5X&bT(~ALW6U-&pPa?TUf}5I64Bg|!@vL} r7#WfZgqWBT7D!E+I5~-7auUyfASg)cNMK+n;AQAYVBBoXbVU^a+*loK delta 71 zcmbOk&>q0$66_MfuED^-7(bEg0h5oX&boH!>hW6V}kot(wRULeHE#K6mtP~hn4 a64Bg|!@vL}7#WfZk~$I?HoG!iQ3U|fGZbt9 diff --git a/tests/data/acpi/x86/pc/DSDT.hpbridge b/tests/data/acpi/x86/pc/DSDT.hpbridge index 67fe28699fbb261cfc7a52b2291f9965ab93c6a8..0eafe5fbf3d73719c9c3e6e26371863bfb44ed2f 100644 GIT binary patch delta 89 zcmaFh^vQ|KCDk-)%Ez{}8)z_{6%Nmm{KEf|gg_5vYRCI()HgaSuT amx$($90mp;!N`zQkkpaDu-TPqnLGeJV-@KD diff --git a/tests/data/acpi/x86/pc/DSDT.memhp b/tests/data/acpi/x86/pc/DSDT.memhp index 9c66ccf150af1622d1b788a1ae04a6e5136cff9e..e3b49757cb7abd7536ee89a6824967d2cb2485cf 100644 GIT binary patch delta 89 zcmaFh`^lHfCDnk1QX1 delta 71 zcmez5`@omWCDr~=xN{=c113MwiEnl=xo}Nh#+YqlJ~@kvy};4aC8D__hk*e| rFft?+2r)4wERdQsadHyFk-)%Ez{}8)z_{6%>4F>p{vRFN delta 71 zcmbR0)a=CN66_MftjNH?$T5-Y0h5oX&boH!>hW6V}kot(wRULeHE#K6mtP~hn4 a64Bg|!@vL}7#WfZk~$I?HoG!ikOKg+W)s^0 diff --git a/tests/data/acpi/x86/pc/DSDT.numamem b/tests/data/acpi/x86/pc/DSDT.numamem index e256bbce790152f045247db631d9f1da81f90499..9bfbfc28213713c208dfc38a85abb46fb190871d 100644 GIT binary patch delta 89 zcmbR4ywaJ=CDf|gg_5vYRCI()HgaSuT amx$($90mp;!N`zQkkpaDu-TOk-)%Ez{}8)z_{6%NnZ~DVZCD-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dWBw!y01)vM(EtDd diff --git a/tests/data/acpi/x86/q35/DSDT.acpierst b/tests/data/acpi/x86/q35/DSDT.acpierst index dbd4f858354df0f4c050fd0b914581154f340ee8..072a3fe2cd17dfe06658dfd82588f69787810114 100644 GIT binary patch delta 91 zcmezF*y+UO66_MfsmQ><7`u^cAtRHY=;XDGJD6O!CZ{uHo0w0|;$kmw^mK`6?#N+a t01}K0Nd-boObH95CQY22#4tID=RXh>By}V(Fck1IbR;lt{=&p82LS2(9X0>} delta 73 zcmeBl`tHc(66_N4U4emtaqUK~g^Wx-B9qrL?qG7_oSe>-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV`i2E03D1JHvj+t diff --git a/tests/data/acpi/x86/q35/DSDT.acpihmat b/tests/data/acpi/x86/q35/DSDT.acpihmat index 952752e30e9dfc9e2085e8fceaa0740dda6db89c..2a4f2fc1d5c5649673353186e67ff5b5e59e8d53 100644 GIT binary patch delta 91 zcmbR2v($&nCDFW=;;#C+>yh; t03;Y0k_v>Fm=YF9O`144iD7aQ&wn5&Na{#nU?|{a=tyAP{Dnzg835}29by0g delta 73 zcmZ4LGuemBCD!@#IFXg^Wx-B9qrL?qG7_oSe>-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dW0qG20N%P2V*mgE diff --git a/tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x b/tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x index e95258cbd8681103a642f8973bd1ac9ef229cff7..7911c058bba5005d318b8db8d6da5c1ee381b0f1 100644 GIT binary patch delta 91 zcmcbS^eTzVCDFW=;;#C+>yh; t03;Y0k_v>Fm=YF9O`144iD7aQ&wn5&Na{#nU?|{a=tyAP{Dmn+4*)5r9$f$c delta 73 zcmaErbR&t&CDfQ+LPjPZk;!WrcQ83|PEKdaR#Kgu#l>DA#LC3L%aBmu c=;;#C+>yh;03;Y0k_wVK5*RkKF{kJO01;jkUH||9 diff --git a/tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator b/tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator index ba2a7d0004be7cd7220716dc7e8594be87197b98..580b4a456a20fc0cc0a832eaf74193b46d8ae8b1 100644 GIT binary patch delta 91 zcmezC-0#BW66_Mfuf)K>XtI%OAtRHY=;XDGJD6O!CZ{uHo0w0|;$kmw^mK`6?#N+a t01}K0Nd-boObH95CQY22#4tID=RXh>By}V(Fck1IbR;lt{=&p54*=zj9V-9; delta 73 zcmeBo`RmN(66_N4SCN5%v1KFILPjPZk;!WrcQ83|PEKdaR#Kgu#l>DA#LC3L%aBmu c=;;#C+>yh;03;Y0k_wVK5*RkKF>}fT02*NwEC2ui diff --git a/tests/data/acpi/x86/q35/DSDT.applesmc b/tests/data/acpi/x86/q35/DSDT.applesmc index b6cb840953ea539092f601e08b7122fc999b3e1b..5e8220e38d6f88b103f6eb3eb7c78dfa466882dc 100644 GIT binary patch delta 91 zcmbQ@w9JXiCDx t0Z1@1BozoTF(oXJnly2862s&qp8r5lkkpaDz)--;(2>Bn`3sYR902mp9eV%( delta 73 zcmZ4HG{uR_CD-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV^)v@0OWxad;kCd diff --git a/tests/data/acpi/x86/q35/DSDT.bridge b/tests/data/acpi/x86/q35/DSDT.bridge index 1939fda2507cde6fcb6f7a093897f9bd2cb987ef..ee039453af1071e00a81ee7b37cf8f417f524257 100644 GIT binary patch delta 91 zcmZpUn;OUE66_Kps?Wf{cylAyLPjP((aCEWcQCndO-^UZHZh-^#l>FW=;;#C+>yh; t03;Y0k_v>Fm=YF9O`144iD7aQ&wn5&Na{#nU?|{a=tyAP{Dp~M2LJ|z9k&1g delta 73 zcmbOl*Br;?66_Mftk1x}sIif2AtRHI$mF$*JD8j}C#N%IE2&P-;$kllVr63BWk@J+ b^mK`6?#N+a01}K0Nd-wA2@IRrnE7=8xVsX# diff --git a/tests/data/acpi/x86/q35/DSDT.core-count b/tests/data/acpi/x86/q35/DSDT.core-count index 41c0832ab5041ff5361598813ec28fe7442b191b..7ebfceeb66460d0ad98471924ce224b7153e87ef 100644 GIT binary patch delta 91 zcmdmzdMuU8CDx t0Z1@1BozoTF(oXJnly2862s&qp8r5lkkpaDz)--;(2>Bn`3utleE>Jw9_#=B delta 73 zcmX?>x+Rs%CD-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV?Lk{02~bz?EnA( diff --git a/tests/data/acpi/x86/q35/DSDT.core-count2 b/tests/data/acpi/x86/q35/DSDT.core-count2 index 153b45f0f7443d25cecc2a752fb6dbd921160e78..d0394558a1faa0b4ba43abab66d474d96b477ff3 100644 GIT binary patch delta 93 zcmZ46!L+}FiOVI}CB(jkfq}7oBiBMkCO^^1YZ-Sixo}NRXUaA)pPa?TUf}5I64Bg| v!@vL}7#WfZgqWBT7D!E+I5~-7auUyfASg)cNMK+n;AQAYVBGwL$+8guRx t0Z1@1BozoTF(oXJnly2862s&qp8r5lkkpaDz)--;(2>Bn`3ut_c>o;=9*+P3 delta 73 zcmX@%y3Li#CD-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV?HDg00znwkN^Mx diff --git a/tests/data/acpi/x86/q35/DSDT.cxl b/tests/data/acpi/x86/q35/DSDT.cxl index 0f1ccdfcc3ffbf151c172015cc4bf18bc4ead218..20843549f54af1cb0e6017c4cfff7463318d9eb7 100644 GIT binary patch delta 91 zcmbQ6zCNAHCDW3mKXGL?^Fh+`;6+H94Ir+r)fw78iSgqo+$mb4LyX t1CU^3NGcFwVoF#bHEH7HB!-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV_sx t0Z1@1BozoTF(oXJnly2862s&qp8r5lkkpaDz)--;(2>Bn`3qBq3IJ9f9`FDF delta 73 zcmaFocgK&*CDDA#LC3L%aBmu c=;;#C+>yh;03;Y0k_wVK5*RkKF=wa%0QdwG@c;k- diff --git a/tests/data/acpi/x86/q35/DSDT.ipmibt b/tests/data/acpi/x86/q35/DSDT.ipmibt index 524fc9f4ee09fd7a5bec62818fd87b6ec300dee8..4066a76d26aa380dfbecc58aa3f83ab5db2baadb 100644 GIT binary patch delta 91 zcmdn!bl8c@CDx t0Z1@1BozoTF(oXJnly2862s&qp8r5lkkpaDz)--;(2>Bn`3sY+8~`Ob9y0&{ delta 73 zcmX@?w9$#nCDDA#LC3L%aBmu c=;;#C+>yh;03;Y0k_wVK5*RkKG26-k0MPdnGynhq diff --git a/tests/data/acpi/x86/q35/DSDT.ipmismbus b/tests/data/acpi/x86/q35/DSDT.ipmismbus index d04d215a1d0fbc77739084d100a35af47a1c1a62..6d0b6b95c2a9fd01befc37b26650781ee1562e2a 100644 GIT binary patch delta 91 zcmdnzbis+sCD$LPjP((aCEWcQCndO-^UZHZh-^#l>FW=;;#C+>yh; t03;Y0k_v>Fm=YF9O`144iD7aQ&wn5&Na{#nU?|{a=tyAP{DsL&4gd$99oPT> delta 73 zcmccMw9kplCD-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dWA>5*0QIaB*#H0l diff --git a/tests/data/acpi/x86/q35/DSDT.ivrs b/tests/data/acpi/x86/q35/DSDT.ivrs index dbd4f858354df0f4c050fd0b914581154f340ee8..072a3fe2cd17dfe06658dfd82588f69787810114 100644 GIT binary patch delta 91 zcmezF*y+UO66_MfsmQ><7`u^cAtRHY=;XDGJD6O!CZ{uHo0w0|;$kmw^mK`6?#N+a t01}K0Nd-boObH95CQY22#4tID=RXh>By}V(Fck1IbR;lt{=&p82LS2(9X0>} delta 73 zcmeBl`tHc(66_N4U4emtaqUK~g^Wx-B9qrL?qG7_oSe>-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV`i2E03D1JHvj+t diff --git a/tests/data/acpi/x86/q35/DSDT.memhp b/tests/data/acpi/x86/q35/DSDT.memhp index f73ade9bf6e4545f9912ed654a282884a54cec79..4f2f9bcfceff076490cc49b8286380295a340004 100644 GIT binary patch delta 91 zcmdn$bKHl^CDx t0Z1@1BozoTF(oXJnly2862s&qp8r5lkkpaDz)--;(2>Bn`3sYyG5{3^9svLV delta 73 zcmX@^v(<;oCDdw@xex}g^Wx-B9qrL?qG7_oSe>-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV|G*q0RQV00ssI2 diff --git a/tests/data/acpi/x86/q35/DSDT.mmio64 b/tests/data/acpi/x86/q35/DSDT.mmio64 index f0ddb4c83cdc9afdf4f289a66ed6bf0d630fd623..0fb6aab16f1bd79f3c0790cc9f644f7e52ac37b1 100644 GIT binary patch delta 91 zcmccM^~j6MCDFW=;;#C+>yh; t03;Y0k_v>Fm=YF9O`144iD7aQ&wn5&Na{#nU?|{a=tyAP{Dmn-2>={69w-0+ delta 73 zcmaFlb-|0vCD-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV~$Y*00uu4DF6Tf diff --git a/tests/data/acpi/x86/q35/DSDT.multi-bridge b/tests/data/acpi/x86/q35/DSDT.multi-bridge index 3ad19e3f5e480db1c449b838c83833f7665186cd..f6afa6d96d2525d512cc46f17439f7a49962b730 100644 GIT binary patch delta 91 zcmcbe{x+SN93mKXGL?^Fh+`;6+H94Ir+r)fw78iSgqo+$mb4LyX t1CU^3NGcFwVoF#bHEH7HB! delta 73 zcmaExemkAZCD-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV}5P`04A0dEC2ui diff --git a/tests/data/acpi/x86/q35/DSDT.nohpet b/tests/data/acpi/x86/q35/DSDT.nohpet index c089b5877a0f4d808abd4d8d9396ee7d2a9a78e5..99ad629c9171ff6ab346d6b4c519e77ca23e5b1c 100644 GIT binary patch delta 91 zcmccN@XCS9CDFW=;;#C+>yh; t03;Y0k_v>Fm=YF9O`144iD7aQ&wn5&Na{#nU?|{a=tyAP{Dmn+762>R9$x?e delta 73 zcmaFmaKnMiCD-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV@{C;01`SBU;qFB diff --git a/tests/data/acpi/x86/q35/DSDT.numamem b/tests/data/acpi/x86/q35/DSDT.numamem index 2867f5b44498d788fc0effd0bf616317821be88e..fd1d8a79d3d9b071c8796e5e99b76698a9a8d29c 100644 GIT binary patch delta 91 zcmaFk_|K8cCDx t0Z1@1BozoTF(oXJnly2862s&qp8r5lkkpaDz)--;(2>Bn`3utzSpZnlA9Mf! delta 73 zcmez8_{NdTCDFW=;;#C+>yh; t03;Y0k_v>Fm=YF9O`144iD7aQ&wn5&Na{#nU?|{a=tyAP{Dmo84geWE9vT1u delta 73 zcmccXblQo_CD-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV-A-C00W;C8vpx t0Z1@1BozoTF(oXJnly2862s&qp8r5lkkpaDz)--;(2>Bn`3utleE>Jw9_#=B delta 73 zcmX?>x+Rs%CD-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV?Lk{02~bz?EnA( diff --git a/tests/data/acpi/x86/q35/DSDT.thread-count2 b/tests/data/acpi/x86/q35/DSDT.thread-count2 index 153b45f0f7443d25cecc2a752fb6dbd921160e78..d0394558a1faa0b4ba43abab66d474d96b477ff3 100644 GIT binary patch delta 93 zcmZ46!L+}FiOVI}CB(jkfq}7oBiBMkCO^^1YZ-Sixo}NRXUaA)pPa?TUf}5I64Bg| v!@vL}7#WfZgqWBT7D!E+I5~-7auUyfASg)cNMK+n;AQAYVBGwL$+8guRx t0Z1@1BozoTF(oXJnly2862s&qp8r5lkkpaDz)--;(2>Bn`3qBk0suQ!9*6({ delta 73 zcmccScEpX#CDDA#LC3L%aBmu c=;;#C+>yh;03;Y0k_wVK5*RkKF$X9B0OFJri2wiq diff --git a/tests/data/acpi/x86/q35/DSDT.tis.tpm2 b/tests/data/acpi/x86/q35/DSDT.tis.tpm2 index b05563deedc65df50f35b2399862d9ee8d4d1e0e..5c975d2162d0bfee5a3a089e79b5ba038f82b7ef 100644 GIT binary patch delta 91 zcmccT_Q8$ICDx t0Z1@1BozoTF(oXJnly2862s&qp8r5lkkpaDz)--;(2>Bn`3qB)0sui69;yHU delta 73 zcmez1cF&E=CD-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dW6n|l03y{Cs{jB1 diff --git a/tests/data/acpi/x86/q35/DSDT.type4-count b/tests/data/acpi/x86/q35/DSDT.type4-count index 00807e7fd4d758bc2ab9c69ac8869cf6864399f7..3194a82b8b4f66aff1ecf7d2d60b4890181fc600 100644 GIT binary patch delta 93 zcmaDbk@3?+MlP3Nmyk~$3=E9H8@U!TGWm&4Udy;*!sObomX e2?dUxE)mTgISdRyf{`JqAgLpPVKW=^8&?2n#1=pR diff --git a/tests/data/acpi/x86/q35/DSDT.viot b/tests/data/acpi/x86/q35/DSDT.viot index c3d83e67660ee3fd59f6fae6242270bed4a567f1..129d43e1e561be3fd7cd71406829ab81d0a8aba0 100644 GIT binary patch delta 91 zcmca!^sx t0Z1@1BozoTF(oXJnly2862s&qp8r5lkkpaDz)--;(2>Bn`3qCB830&J9{K

(KT@%~1xg^Wx-B9qrL?qG7_oSe>-t)x0Ri;KNLh?R+fmm#6R c(bFZOxg&>x0Z1@1Bo!oeBrt4dV@@^$05x|M`v3p{ diff --git a/tests/data/acpi/x86/q35/DSDT.xapic b/tests/data/acpi/x86/q35/DSDT.xapic index 227d421f16ed1824a87e8a91da734828f8b48cbf..b37ab591110d1c8201575ad6bba83449d7b90b21 100644 GIT binary patch delta 93 zcmX>(o$2;;CN7s?myp}t3=E9T8@U!TGWm&4Udy;*!sObomX e2?dUxE)mTgISdRyf{`JqAgLpPVKW=^)eZo5W)`Rb diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index 085dfa9ff4..dfb8523c8b 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1,41 +1 @@ /* List of comma-separated changed AML files to ignore */ -"tests/data/acpi/x86/pc/DSDT", -"tests/data/acpi/x86/pc/DSDT.acpierst", -"tests/data/acpi/x86/pc/DSDT.acpihmat", -"tests/data/acpi/x86/pc/DSDT.bridge", -"tests/data/acpi/x86/pc/DSDT.cphp", -"tests/data/acpi/x86/pc/DSDT.dimmpxm", -"tests/data/acpi/x86/pc/DSDT.hpbridge", -"tests/data/acpi/x86/pc/DSDT.ipmikcs", -"tests/data/acpi/x86/pc/DSDT.memhp", -"tests/data/acpi/x86/pc/DSDT.nohpet", -"tests/data/acpi/x86/pc/DSDT.numamem", -"tests/data/acpi/x86/pc/DSDT.roothp", -"tests/data/acpi/x86/q35/DSDT", -"tests/data/acpi/x86/q35/DSDT.acpierst", -"tests/data/acpi/x86/q35/DSDT.acpihmat", -"tests/data/acpi/x86/q35/DSDT.acpihmat-generic-x", -"tests/data/acpi/x86/q35/DSDT.acpihmat-noinitiator", -"tests/data/acpi/x86/q35/DSDT.applesmc", -"tests/data/acpi/x86/q35/DSDT.bridge", -"tests/data/acpi/x86/q35/DSDT.core-count", -"tests/data/acpi/x86/q35/DSDT.core-count2", -"tests/data/acpi/x86/q35/DSDT.cphp", -"tests/data/acpi/x86/q35/DSDT.cxl", -"tests/data/acpi/x86/q35/DSDT.dimmpxm", -"tests/data/acpi/x86/q35/DSDT.ipmibt", -"tests/data/acpi/x86/q35/DSDT.ipmismbus", -"tests/data/acpi/x86/q35/DSDT.ivrs", -"tests/data/acpi/x86/q35/DSDT.memhp", -"tests/data/acpi/x86/q35/DSDT.mmio64", -"tests/data/acpi/x86/q35/DSDT.multi-bridge", -"tests/data/acpi/x86/q35/DSDT.nohpet", -"tests/data/acpi/x86/q35/DSDT.numamem", -"tests/data/acpi/x86/q35/DSDT.pvpanic-isa", -"tests/data/acpi/x86/q35/DSDT.thread-count", -"tests/data/acpi/x86/q35/DSDT.thread-count2", -"tests/data/acpi/x86/q35/DSDT.tis.tpm12", -"tests/data/acpi/x86/q35/DSDT.tis.tpm2", -"tests/data/acpi/x86/q35/DSDT.type4-count", -"tests/data/acpi/x86/q35/DSDT.viot", -"tests/data/acpi/x86/q35/DSDT.xapic", From 1ce979e7269a34d19ea1a65808df014d8b2acbf6 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 15 Jan 2025 15:58:34 +0800 Subject: [PATCH 46/49] hw/cxl: Fix msix_notify: Assertion `vector < dev->msix_entries_nr` This assertion always happens when we sanitize the CXL memory device. $ echo 1 > /sys/bus/cxl/devices/mem0/security/sanitize It is incorrect to register an MSIX number beyond the device's capability. Increase the device's MSIX number to cover the mailbox msix number(9). Fixes: 43efb0bfad2b ("hw/cxl/mbox: Wire up interrupts for background completion") Signed-off-by: Li Zhijian Message-Id: <20250115075834.167504-1-lizhijian@fujitsu.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/mem/cxl_type3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/mem/cxl_type3.c b/hw/mem/cxl_type3.c index bd7652740f..0ae1704a34 100644 --- a/hw/mem/cxl_type3.c +++ b/hw/mem/cxl_type3.c @@ -843,7 +843,7 @@ static void ct3_realize(PCIDevice *pci_dev, Error **errp) ComponentRegisters *regs = &cxl_cstate->crb; MemoryRegion *mr = ®s->component_registers; uint8_t *pci_conf = pci_dev->config; - unsigned short msix_num = 6; + unsigned short msix_num = 10; int i, rc; uint16_t count; From 3f65357313e0f928e0bd3ff868b705855d0405bc Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Wed, 15 Jan 2025 14:50:43 +0100 Subject: [PATCH 47/49] vhost: Add stubs for the migration state transfer interface Migration state transfer interface is only used by vhost-user-fs, so the interface needs to be defined only when vhost is built. But I need to use this interface with virtio-net and vhost is not always enabled, and to avoid undefined reference error during build, define stub functions for vhost_supports_device_state(), vhost_save_backend_state() and vhost_load_backend_state(). Cc: Hanna Czenczek Signed-off-by: Laurent Vivier Message-Id: <20250115135044.799698-2-lvivier@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/virtio/vhost.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 461c168c37..a9469d50bc 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -365,7 +365,14 @@ static inline int vhost_reset_device(struct vhost_dev *hdev) * Returns true if the device supports these commands, and false if it * does not. */ +#ifdef CONFIG_VHOST bool vhost_supports_device_state(struct vhost_dev *dev); +#else +static inline bool vhost_supports_device_state(struct vhost_dev *dev) +{ + return false; +} +#endif /** * vhost_set_device_state_fd(): Begin transfer of internal state from/to @@ -448,7 +455,15 @@ int vhost_check_device_state(struct vhost_dev *dev, Error **errp); * * Returns 0 on success, and -errno otherwise. */ +#ifdef CONFIG_VHOST int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); +#else +static inline int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, + Error **errp) +{ + return -ENOSYS; +} +#endif /** * vhost_load_backend_state(): High-level function to load a vhost @@ -465,6 +480,14 @@ int vhost_save_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); * * Returns 0 on success, and -errno otherwise. */ +#ifdef CONFIG_VHOST int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, Error **errp); +#else +static inline int vhost_load_backend_state(struct vhost_dev *dev, QEMUFile *f, + Error **errp) +{ + return -ENOSYS; +} +#endif #endif From 60f543ad917fad731e39ff8ce2ca83b9a9cc9d90 Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Wed, 15 Jan 2025 14:50:44 +0100 Subject: [PATCH 48/49] virtio-net: vhost-user: Implement internal migration Add support of VHOST_USER_PROTOCOL_F_DEVICE_STATE in virtio-net with vhost-user backend. Cc: Hanna Czenczek Signed-off-by: Laurent Vivier Message-Id: <20250115135044.799698-3-lvivier@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 135 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 112 insertions(+), 23 deletions(-) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 06f096abf6..85e14b788c 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -3337,6 +3337,117 @@ static const VMStateDescription vmstate_virtio_net_rss = { }, }; +static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) +{ + VirtIONet *n = VIRTIO_NET(vdev); + NetClientState *nc; + struct vhost_net *net; + + if (!n->nic) { + return NULL; + } + + nc = qemu_get_queue(n->nic); + if (!nc) { + return NULL; + } + + net = get_vhost_net(nc->peer); + if (!net) { + return NULL; + } + + return &net->dev; +} + +static int vhost_user_net_save_state(QEMUFile *f, void *pv, size_t size, + const VMStateField *field, + JSONWriter *vmdesc) +{ + VirtIONet *n = pv; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct vhost_dev *vhdev; + Error *local_error = NULL; + int ret; + + vhdev = virtio_net_get_vhost(vdev); + if (vhdev == NULL) { + error_reportf_err(local_error, + "Error getting vhost back-end of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return -1; + } + + ret = vhost_save_backend_state(vhdev, f, &local_error); + if (ret < 0) { + error_reportf_err(local_error, + "Error saving back-end state of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return ret; + } + + return 0; +} + +static int vhost_user_net_load_state(QEMUFile *f, void *pv, size_t size, + const VMStateField *field) +{ + VirtIONet *n = pv; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct vhost_dev *vhdev; + Error *local_error = NULL; + int ret; + + vhdev = virtio_net_get_vhost(vdev); + if (vhdev == NULL) { + error_reportf_err(local_error, + "Error getting vhost back-end of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return -1; + } + + ret = vhost_load_backend_state(vhdev, f, &local_error); + if (ret < 0) { + error_reportf_err(local_error, + "Error loading back-end state of %s device %s: ", + vdev->name, vdev->parent_obj.canonical_path); + return ret; + } + + return 0; +} + +static bool vhost_user_net_is_internal_migration(void *opaque) +{ + VirtIONet *n = opaque; + VirtIODevice *vdev = VIRTIO_DEVICE(n); + struct vhost_dev *vhdev; + + vhdev = virtio_net_get_vhost(vdev); + if (vhdev == NULL) { + return false; + } + + return vhost_supports_device_state(vhdev); +} + +static const VMStateDescription vhost_user_net_backend_state = { + .name = "virtio-net-device/backend", + .version_id = 0, + .needed = vhost_user_net_is_internal_migration, + .fields = (const VMStateField[]) { + { + .name = "backend", + .info = &(const VMStateInfo) { + .name = "virtio-net vhost-user backend state", + .get = vhost_user_net_load_state, + .put = vhost_user_net_save_state, + }, + }, + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_virtio_net_device = { .name = "virtio-net-device", .version_id = VIRTIO_NET_VM_VERSION, @@ -3389,6 +3500,7 @@ static const VMStateDescription vmstate_virtio_net_device = { }, .subsections = (const VMStateDescription * const []) { &vmstate_virtio_net_rss, + &vhost_user_net_backend_state, NULL } }; @@ -3950,29 +4062,6 @@ static bool dev_unplug_pending(void *opaque) return vdc->primary_unplug_pending(dev); } -static struct vhost_dev *virtio_net_get_vhost(VirtIODevice *vdev) -{ - VirtIONet *n = VIRTIO_NET(vdev); - NetClientState *nc; - struct vhost_net *net; - - if (!n->nic) { - return NULL; - } - - nc = qemu_get_queue(n->nic); - if (!nc) { - return NULL; - } - - net = get_vhost_net(nc->peer); - if (!net) { - return NULL; - } - - return &net->dev; -} - static const VMStateDescription vmstate_virtio_net = { .name = "virtio-net", .minimum_version_id = VIRTIO_NET_VM_VERSION, From 3634039b93cc51816263e0cb5ba32e1b61142d5d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 7 Jan 2025 16:28:16 +0000 Subject: [PATCH 49/49] hw/acpi: Add vmclock device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vmclock device addresses the problem of live migration with precision clocks. The tolerances of a hardware counter (e.g. TSC) are typically around ±50PPM. A guest will use NTP/PTP/PPS to discipline that counter against an external source of 'real' time, and track the precise frequency of the counter as it changes with environmental conditions. When a guest is live migrated, anything it knows about the frequency of the underlying counter becomes invalid. It may move from a host where the counter running at -50PPM of its nominal frequency, to a host where it runs at +50PPM. There will also be a step change in the value of the counter, as the correctness of its absolute value at migration is limited by the accuracy of the source and destination host's time synchronization. The device exposes a shared memory region to guests, which can be mapped all the way to userspace. In the first phase, this merely advertises a 'disruption_marker', which indicates that the guest should throw away any NTP synchronization it thinks it has, and start again. Because the region can be exposed all the way to userspace, applications can still use time from a fast vDSO 'system call', and check the disruption marker to be sure that their timestamp is indeed truthful. The structure also allows for the precise time, as known by the host, to be exposed directly to guests so that they don't have to wait for NTP to resync from scratch. The values and fields are based on the nascent virtio-rtc specification, and the intent is that a version (hopefully precisely this version) of this structure will be included as an optional part of that spec. In the meantime, a simple ACPI device along the lines of VMGENID is perfectly sufficient and is compatible with what's being shipped in certain commercial hypervisors. Linux guest support was merged into the 6.13-rc1 kernel: https://git.kernel.org/torvalds/c/205032724226 Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant Message-Id: <07fd5e2f529098ad4d7cab1423fe9f4a03a9cc14.camel@infradead.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/Kconfig | 5 + hw/acpi/meson.build | 1 + hw/acpi/vmclock.c | 179 ++++++++++++++++++ hw/i386/Kconfig | 1 + hw/i386/acpi-build.c | 10 +- include/hw/acpi/vmclock.h | 34 ++++ include/standard-headers/linux/vmclock-abi.h | 182 +++++++++++++++++++ scripts/update-linux-headers.sh | 1 + 8 files changed, 412 insertions(+), 1 deletion(-) create mode 100644 hw/acpi/vmclock.c create mode 100644 include/hw/acpi/vmclock.h create mode 100644 include/standard-headers/linux/vmclock-abi.h diff --git a/hw/acpi/Kconfig b/hw/acpi/Kconfig index e07d3204eb..1d4e9f0845 100644 --- a/hw/acpi/Kconfig +++ b/hw/acpi/Kconfig @@ -60,6 +60,11 @@ config ACPI_VMGENID default y depends on PC +config ACPI_VMCLOCK + bool + default y + depends on PC + config ACPI_VIOT bool depends on ACPI diff --git a/hw/acpi/meson.build b/hw/acpi/meson.build index c8854f4d48..73f02b9691 100644 --- a/hw/acpi/meson.build +++ b/hw/acpi/meson.build @@ -15,6 +15,7 @@ acpi_ss.add(when: 'CONFIG_ACPI_NVDIMM', if_false: files('acpi-nvdimm-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_PCI', if_true: files('pci.c')) acpi_ss.add(when: 'CONFIG_ACPI_CXL', if_true: files('cxl.c'), if_false: files('cxl-stub.c')) acpi_ss.add(when: 'CONFIG_ACPI_VMGENID', if_true: files('vmgenid.c')) +acpi_ss.add(when: 'CONFIG_ACPI_VMCLOCK', if_true: files('vmclock.c')) acpi_ss.add(when: 'CONFIG_ACPI_HW_REDUCED', if_true: files('generic_event_device.c')) acpi_ss.add(when: 'CONFIG_ACPI_HMAT', if_true: files('hmat.c')) acpi_ss.add(when: 'CONFIG_ACPI_APEI', if_true: files('ghes.c'), if_false: files('ghes-stub.c')) diff --git a/hw/acpi/vmclock.c b/hw/acpi/vmclock.c new file mode 100644 index 0000000000..7387e5c9ca --- /dev/null +++ b/hw/acpi/vmclock.c @@ -0,0 +1,179 @@ +/* + * Virtual Machine Clock Device + * + * Copyright © 2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Authors: David Woodhouse + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/module.h" +#include "hw/i386/e820_memory_layout.h" +#include "hw/acpi/acpi.h" +#include "hw/acpi/aml-build.h" +#include "hw/acpi/vmclock.h" +#include "hw/nvram/fw_cfg.h" +#include "hw/qdev-properties.h" +#include "hw/qdev-properties-system.h" +#include "migration/vmstate.h" +#include "system/reset.h" + +#include "standard-headers/linux/vmclock-abi.h" + +void vmclock_build_acpi(VmclockState *vms, GArray *table_data, + BIOSLinker *linker, const char *oem_id) +{ + Aml *ssdt, *dev, *scope, *crs; + AcpiTable table = { .sig = "SSDT", .rev = 1, + .oem_id = oem_id, .oem_table_id = "VMCLOCK" }; + + /* Put VMCLOCK into a separate SSDT table */ + acpi_table_begin(&table, table_data); + ssdt = init_aml_allocator(); + + scope = aml_scope("\\_SB"); + dev = aml_device("VCLK"); + aml_append(dev, aml_name_decl("_HID", aml_string("AMZNC10C"))); + aml_append(dev, aml_name_decl("_CID", aml_string("VMCLOCK"))); + aml_append(dev, aml_name_decl("_DDN", aml_string("VMCLOCK"))); + + /* Simple status method */ + aml_append(dev, aml_name_decl("_STA", aml_int(0xf))); + + crs = aml_resource_template(); + aml_append(crs, aml_qword_memory(AML_POS_DECODE, + AML_MIN_FIXED, AML_MAX_FIXED, + AML_CACHEABLE, AML_READ_ONLY, + 0xffffffffffffffffULL, + vms->physaddr, + vms->physaddr + VMCLOCK_SIZE - 1, + 0, VMCLOCK_SIZE)); + aml_append(dev, aml_name_decl("_CRS", crs)); + aml_append(scope, dev); + aml_append(ssdt, scope); + + g_array_append_vals(table_data, ssdt->buf->data, ssdt->buf->len); + acpi_table_end(linker, &table); + free_aml_allocator(); +} + +static void vmclock_update_guest(VmclockState *vms) +{ + uint64_t disruption_marker; + uint32_t seq_count; + + if (!vms->clk) { + return; + } + + seq_count = le32_to_cpu(vms->clk->seq_count) | 1; + vms->clk->seq_count = cpu_to_le32(seq_count); + /* These barriers pair with read barriers in the guest */ + smp_wmb(); + + disruption_marker = le64_to_cpu(vms->clk->disruption_marker); + disruption_marker++; + vms->clk->disruption_marker = cpu_to_le64(disruption_marker); + + /* These barriers pair with read barriers in the guest */ + smp_wmb(); + vms->clk->seq_count = cpu_to_le32(seq_count + 1); +} + +/* + * After restoring an image, we need to update the guest memory to notify + * it of clock disruption. + */ +static int vmclock_post_load(void *opaque, int version_id) +{ + VmclockState *vms = opaque; + + vmclock_update_guest(vms); + return 0; +} + +static const VMStateDescription vmstate_vmclock = { + .name = "vmclock", + .version_id = 1, + .minimum_version_id = 1, + .post_load = vmclock_post_load, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(physaddr, VmclockState), + VMSTATE_END_OF_LIST() + }, +}; + +static void vmclock_handle_reset(void *opaque) +{ + VmclockState *vms = VMCLOCK(opaque); + + if (!memory_region_is_mapped(&vms->clk_page)) { + memory_region_add_subregion_overlap(get_system_memory(), + vms->physaddr, + &vms->clk_page, 0); + } +} + +static void vmclock_realize(DeviceState *dev, Error **errp) +{ + VmclockState *vms = VMCLOCK(dev); + + /* + * Given that this function is executing, there is at least one VMCLOCK + * device. Check if there are several. + */ + if (!find_vmclock_dev()) { + error_setg(errp, "at most one %s device is permitted", TYPE_VMCLOCK); + return; + } + + vms->physaddr = VMCLOCK_ADDR; + + e820_add_entry(vms->physaddr, VMCLOCK_SIZE, E820_RESERVED); + + memory_region_init_ram(&vms->clk_page, OBJECT(dev), "vmclock_page", + VMCLOCK_SIZE, &error_abort); + memory_region_set_enabled(&vms->clk_page, true); + vms->clk = memory_region_get_ram_ptr(&vms->clk_page); + memset(vms->clk, 0, VMCLOCK_SIZE); + + vms->clk->magic = cpu_to_le32(VMCLOCK_MAGIC); + vms->clk->size = cpu_to_le16(VMCLOCK_SIZE); + vms->clk->version = cpu_to_le16(1); + + /* These are all zero and thus default, but be explicit */ + vms->clk->clock_status = VMCLOCK_STATUS_UNKNOWN; + vms->clk->counter_id = VMCLOCK_COUNTER_INVALID; + + qemu_register_reset(vmclock_handle_reset, vms); + + vmclock_update_guest(vms); +} + +static void vmclock_device_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + + dc->vmsd = &vmstate_vmclock; + dc->realize = vmclock_realize; + dc->hotpluggable = false; + set_bit(DEVICE_CATEGORY_MISC, dc->categories); +} + +static const TypeInfo vmclock_device_info = { + .name = TYPE_VMCLOCK, + .parent = TYPE_DEVICE, + .instance_size = sizeof(VmclockState), + .class_init = vmclock_device_class_init, +}; + +static void vmclock_register_types(void) +{ + type_register_static(&vmclock_device_info); +} + +type_init(vmclock_register_types) diff --git a/hw/i386/Kconfig b/hw/i386/Kconfig index 32818480d2..d34ce07b21 100644 --- a/hw/i386/Kconfig +++ b/hw/i386/Kconfig @@ -43,6 +43,7 @@ config PC select SERIAL_ISA select ACPI_PCI select ACPI_VMGENID + select ACPI_VMCLOCK select VIRTIO_PMEM_SUPPORTED select VIRTIO_MEM_SUPPORTED select HV_BALLOON_SUPPORTED diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 1311a0d4f3..53b7306b43 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -43,6 +43,7 @@ #include "system/tpm.h" #include "hw/acpi/tpm.h" #include "hw/acpi/vmgenid.h" +#include "hw/acpi/vmclock.h" #include "hw/acpi/erst.h" #include "hw/acpi/piix4.h" #include "system/tpm_backend.h" @@ -2445,7 +2446,7 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) uint8_t *u; GArray *tables_blob = tables->table_data; AcpiSlicOem slic_oem = { .id = NULL, .table_id = NULL }; - Object *vmgenid_dev; + Object *vmgenid_dev, *vmclock_dev; char *oem_id; char *oem_table_id; @@ -2518,6 +2519,13 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) tables->vmgenid, tables->linker, x86ms->oem_id); } + vmclock_dev = find_vmclock_dev(); + if (vmclock_dev) { + acpi_add_table(table_offsets, tables_blob); + vmclock_build_acpi(VMCLOCK(vmclock_dev), tables_blob, tables->linker, + x86ms->oem_id); + } + if (misc.has_hpet) { acpi_add_table(table_offsets, tables_blob); build_hpet(tables_blob, tables->linker, x86ms->oem_id, diff --git a/include/hw/acpi/vmclock.h b/include/hw/acpi/vmclock.h new file mode 100644 index 0000000000..5605605812 --- /dev/null +++ b/include/hw/acpi/vmclock.h @@ -0,0 +1,34 @@ +#ifndef ACPI_VMCLOCK_H +#define ACPI_VMCLOCK_H + +#include "hw/acpi/bios-linker-loader.h" +#include "hw/qdev-core.h" +#include "qemu/uuid.h" +#include "qom/object.h" + +#define TYPE_VMCLOCK "vmclock" + +#define VMCLOCK_ADDR 0xfeffb000 +#define VMCLOCK_SIZE 0x1000 + +OBJECT_DECLARE_SIMPLE_TYPE(VmclockState, VMCLOCK) + +struct vmclock_abi; + +struct VmclockState { + DeviceState parent_obj; + MemoryRegion clk_page; + uint64_t physaddr; + struct vmclock_abi *clk; +}; + +/* returns NULL unless there is exactly one device */ +static inline Object *find_vmclock_dev(void) +{ + return object_resolve_path_type("", TYPE_VMCLOCK, NULL); +} + +void vmclock_build_acpi(VmclockState *vms, GArray *table_data, + BIOSLinker *linker, const char *oem_id); + +#endif diff --git a/include/standard-headers/linux/vmclock-abi.h b/include/standard-headers/linux/vmclock-abi.h new file mode 100644 index 0000000000..15b0316cb4 --- /dev/null +++ b/include/standard-headers/linux/vmclock-abi.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ + +/* + * This structure provides a vDSO-style clock to VM guests, exposing the + * relationship (or lack thereof) between the CPU clock (TSC, timebase, arch + * counter, etc.) and real time. It is designed to address the problem of + * live migration, which other clock enlightenments do not. + * + * When a guest is live migrated, this affects the clock in two ways. + * + * First, even between identical hosts the actual frequency of the underlying + * counter will change within the tolerances of its specification (typically + * ±50PPM, or 4 seconds a day). This frequency also varies over time on the + * same host, but can be tracked by NTP as it generally varies slowly. With + * live migration there is a step change in the frequency, with no warning. + * + * Second, there may be a step change in the value of the counter itself, as + * its accuracy is limited by the precision of the NTP synchronization on the + * source and destination hosts. + * + * So any calibration (NTP, PTP, etc.) which the guest has done on the source + * host before migration is invalid, and needs to be redone on the new host. + * + * In its most basic mode, this structure provides only an indication to the + * guest that live migration has occurred. This allows the guest to know that + * its clock is invalid and take remedial action. For applications that need + * reliable accurate timestamps (e.g. distributed databases), the structure + * can be mapped all the way to userspace. This allows the application to see + * directly for itself that the clock is disrupted and take appropriate + * action, even when using a vDSO-style method to get the time instead of a + * system call. + * + * In its more advanced mode. this structure can also be used to expose the + * precise relationship of the CPU counter to real time, as calibrated by the + * host. This means that userspace applications can have accurate time + * immediately after live migration, rather than having to pause operations + * and wait for NTP to recover. This mode does, of course, rely on the + * counter being reliable and consistent across CPUs. + * + * Note that this must be true UTC, never with smeared leap seconds. If a + * guest wishes to construct a smeared clock, it can do so. Presenting a + * smeared clock through this interface would be problematic because it + * actually messes with the apparent counter *period*. A linear smearing + * of 1 ms per second would effectively tweak the counter period by 1000PPM + * at the start/end of the smearing period, while a sinusoidal smear would + * basically be impossible to represent. + * + * This structure is offered with the intent that it be adopted into the + * nascent virtio-rtc standard, as a virtio-rtc that does not address the live + * migration problem seems a little less than fit for purpose. For that + * reason, certain fields use precisely the same numeric definitions as in + * the virtio-rtc proposal. The structure can also be exposed through an ACPI + * device with the CID "VMCLOCK", modelled on the "VMGENID" device except for + * the fact that it uses a real _CRS to convey the address of the structure + * (which should be a full page, to allow for mapping directly to userspace). + */ + +#ifndef __VMCLOCK_ABI_H__ +#define __VMCLOCK_ABI_H__ + +#include "standard-headers/linux/types.h" + +struct vmclock_abi { + /* CONSTANT FIELDS */ + uint32_t magic; +#define VMCLOCK_MAGIC 0x4b4c4356 /* "VCLK" */ + uint32_t size; /* Size of region containing this structure */ + uint16_t version; /* 1 */ + uint8_t counter_id; /* Matches VIRTIO_RTC_COUNTER_xxx except INVALID */ +#define VMCLOCK_COUNTER_ARM_VCNT 0 +#define VMCLOCK_COUNTER_X86_TSC 1 +#define VMCLOCK_COUNTER_INVALID 0xff + uint8_t time_type; /* Matches VIRTIO_RTC_TYPE_xxx */ +#define VMCLOCK_TIME_UTC 0 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_TAI 1 /* Since 1970-01-01 00:00:00z */ +#define VMCLOCK_TIME_MONOTONIC 2 /* Since undefined epoch */ +#define VMCLOCK_TIME_INVALID_SMEARED 3 /* Not supported */ +#define VMCLOCK_TIME_INVALID_MAYBE_SMEARED 4 /* Not supported */ + + /* NON-CONSTANT FIELDS PROTECTED BY SEQCOUNT LOCK */ + uint32_t seq_count; /* Low bit means an update is in progress */ + /* + * This field changes to another non-repeating value when the CPU + * counter is disrupted, for example on live migration. This lets + * the guest know that it should discard any calibration it has + * performed of the counter against external sources (NTP/PTP/etc.). + */ + uint64_t disruption_marker; + uint64_t flags; + /* Indicates that the tai_offset_sec field is valid */ +#define VMCLOCK_FLAG_TAI_OFFSET_VALID (1 << 0) + /* + * Optionally used to notify guests of pending maintenance events. + * A guest which provides latency-sensitive services may wish to + * remove itself from service if an event is coming up. Two flags + * indicate the approximate imminence of the event. + */ +#define VMCLOCK_FLAG_DISRUPTION_SOON (1 << 1) /* About a day */ +#define VMCLOCK_FLAG_DISRUPTION_IMMINENT (1 << 2) /* About an hour */ +#define VMCLOCK_FLAG_PERIOD_ESTERROR_VALID (1 << 3) +#define VMCLOCK_FLAG_PERIOD_MAXERROR_VALID (1 << 4) +#define VMCLOCK_FLAG_TIME_ESTERROR_VALID (1 << 5) +#define VMCLOCK_FLAG_TIME_MAXERROR_VALID (1 << 6) + /* + * If the MONOTONIC flag is set then (other than leap seconds) it is + * guaranteed that the time calculated according this structure at + * any given moment shall never appear to be later than the time + * calculated via the structure at any *later* moment. + * + * In particular, a timestamp based on a counter reading taken + * immediately after setting the low bit of seq_count (and the + * associated memory barrier), using the previously-valid time and + * period fields, shall never be later than a timestamp based on + * a counter reading taken immediately before *clearing* the low + * bit again after the update, using the about-to-be-valid fields. + */ +#define VMCLOCK_FLAG_TIME_MONOTONIC (1 << 7) + + uint8_t pad[2]; + uint8_t clock_status; +#define VMCLOCK_STATUS_UNKNOWN 0 +#define VMCLOCK_STATUS_INITIALIZING 1 +#define VMCLOCK_STATUS_SYNCHRONIZED 2 +#define VMCLOCK_STATUS_FREERUNNING 3 +#define VMCLOCK_STATUS_UNRELIABLE 4 + + /* + * The time exposed through this device is never smeared. This field + * corresponds to the 'subtype' field in virtio-rtc, which indicates + * the smearing method. However in this case it provides a *hint* to + * the guest operating system, such that *if* the guest OS wants to + * provide its users with an alternative clock which does not follow + * UTC, it may do so in a fashion consistent with the other systems + * in the nearby environment. + */ + uint8_t leap_second_smearing_hint; /* Matches VIRTIO_RTC_SUBTYPE_xxx */ +#define VMCLOCK_SMEARING_STRICT 0 +#define VMCLOCK_SMEARING_NOON_LINEAR 1 +#define VMCLOCK_SMEARING_UTC_SLS 2 + uint16_t tai_offset_sec; /* Actually two's complement signed */ + uint8_t leap_indicator; + /* + * This field is based on the VIRTIO_RTC_LEAP_xxx values as defined + * in the current draft of virtio-rtc, but since smearing cannot be + * used with the shared memory device, some values are not used. + * + * The _POST_POS and _POST_NEG values allow the guest to perform + * its own smearing during the day or so after a leap second when + * such smearing may need to continue being applied for a leap + * second which is now theoretically "historical". + */ +#define VMCLOCK_LEAP_NONE 0x00 /* No known nearby leap second */ +#define VMCLOCK_LEAP_PRE_POS 0x01 /* Positive leap second at EOM */ +#define VMCLOCK_LEAP_PRE_NEG 0x02 /* Negative leap second at EOM */ +#define VMCLOCK_LEAP_POS 0x03 /* Set during 23:59:60 second */ +#define VMCLOCK_LEAP_POST_POS 0x04 +#define VMCLOCK_LEAP_POST_NEG 0x05 + + /* Bit shift for counter_period_frac_sec and its error rate */ + uint8_t counter_period_shift; + /* + * Paired values of counter and UTC at a given point in time. + */ + uint64_t counter_value; + /* + * Counter period, and error margin of same. The unit of these + * fields is 1/2^(64 + counter_period_shift) of a second. + */ + uint64_t counter_period_frac_sec; + uint64_t counter_period_esterror_rate_frac_sec; + uint64_t counter_period_maxerror_rate_frac_sec; + + /* + * Time according to time_type field above. + */ + uint64_t time_sec; /* Seconds since time_type epoch */ + uint64_t time_frac_sec; /* Units of 1/2^64 of a second */ + uint64_t time_esterror_nanosec; + uint64_t time_maxerror_nanosec; +}; + +#endif /* __VMCLOCK_ABI_H__ */ diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 99a8d9fa4c..8913e4fb99 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -258,6 +258,7 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ "$hdrdir/include/linux/kernel.h" \ "$hdrdir/include/linux/kvm_para.h" \ "$hdrdir/include/linux/vhost_types.h" \ + "$hdrdir/include/linux/vmclock-abi.h" \ "$hdrdir/include/linux/sysinfo.h"; do cp_portable "$i" "$output/include/standard-headers/linux" done