From e969f992c6562222e245dd8557f5b132a11ec29c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 8 Aug 2023 17:58:46 +0100 Subject: [PATCH 1/7] i386/xen: Don't advertise XENFEAT_supervisor_mode_kernel This confuses lscpu into thinking it's running in PVH mode. Cc: qemu-stable@nongnu.org Fixes: bedcc139248 ("i386/xen: implement HYPERVISOR_xen_version") Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- target/i386/kvm/xen-emu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index 76348f9d5d..0055441b2e 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -267,7 +267,6 @@ static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu, fi.submap |= 1 << XENFEAT_writable_page_tables | 1 << XENFEAT_writable_descriptor_tables | 1 << XENFEAT_auto_translated_physmap | - 1 << XENFEAT_supervisor_mode_kernel | 1 << XENFEAT_hvm_callback_vector | 1 << XENFEAT_hvm_safe_pvclock | 1 << XENFEAT_hvm_pirqs; From e7dbb62ff19ce55548c785d76e814e7b144e6217 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 11 Oct 2023 23:30:08 +0100 Subject: [PATCH 2/7] i386/xen: fix per-vCPU upcall vector for Xen emulation The per-vCPU upcall vector support had three problems. Firstly it was using the wrong hypercall argument and would always return -EFAULT when the guest tried to set it up. Secondly it was using the wrong ioctl() to pass the vector to the kernel and thus the *kernel* would always return -EINVAL. Finally, even when delivering the event directly from userspace with an MSI, it put the destination CPU ID into the wrong bits of the MSI address. Linux doesn't (yet) use this mode so it went without decent testing for a while. Cc: qemu-stable@nongnu.org Fixes: 105b47fdf2d0 ("i386/xen: implement HVMOP_set_evtchn_upcall_vector") Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- target/i386/kvm/xen-emu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index 0055441b2e..7c504d9fa4 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -306,7 +306,7 @@ static int kvm_xen_set_vcpu_callback_vector(CPUState *cs) trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector); - return kvm_vcpu_ioctl(cs, KVM_XEN_HVM_SET_ATTR, &xva); + return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xva); } static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data) @@ -440,7 +440,8 @@ void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type) * deliver it as an MSI. */ MSIMessage msg = { - .address = APIC_DEFAULT_ADDRESS | X86_CPU(cs)->apic_id, + .address = APIC_DEFAULT_ADDRESS | + (X86_CPU(cs)->apic_id << MSI_ADDR_DEST_ID_SHIFT), .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT), }; kvm_irqchip_send_msi(kvm_state, msg); @@ -849,8 +850,7 @@ static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu, int ret = -ENOSYS; switch (cmd) { case HVMOP_set_evtchn_upcall_vector: - ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, - exit->u.hcall.params[0]); + ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, arg); break; case HVMOP_pagetable_dying: From 18e83f28bf39ffd2784aeb2e4e229096a86d349b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 12 Oct 2023 00:06:26 +0100 Subject: [PATCH 3/7] hw/xen: select kernel mode for per-vCPU event channel upcall vector A guest which has configured the per-vCPU upcall vector may set the HVM_PARAM_CALLBACK_IRQ param to fairly much anything other than zero. For example, Linux v6.0+ after commit b1c3497e604 ("x86/xen: Add support for HVMOP_set_evtchn_upcall_vector") will just do this after setting the vector: /* Trick toolstack to think we are enlightened. */ if (!cpu) rc = xen_set_callback_via(1); That's explicitly setting the delivery to GSI#1, but it's supposed to be overridden by the per-vCPU vector setting. This mostly works in Qemu *except* for the logic to enable the in-kernel handling of event channels, which falsely determines that the kernel cannot accelerate GSI delivery in this case. Add a kvm_xen_has_vcpu_callback_vector() to report whether vCPU#0 has the vector set, and use that in xen_evtchn_set_callback_param() to enable the kernel acceleration features even when the param *appears* to be set to target a GSI. Preserve the Xen behaviour that when HVM_PARAM_CALLBACK_IRQ is set to *zero* the event channel delivery is disabled completely. (Which is what that bizarre guest behaviour is working round in the first place.) Cc: qemu-stable@nongnu.org Fixes: 91cce756179 ("hw/xen: Add xen_evtchn device for event channel emulation") Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xen_evtchn.c | 6 ++++++ include/sysemu/kvm_xen.h | 1 + target/i386/kvm/xen-emu.c | 7 +++++++ 3 files changed, 14 insertions(+) diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index a731738411..3d6f4b4a0a 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -490,6 +490,12 @@ int xen_evtchn_set_callback_param(uint64_t param) break; } + /* If the guest has set a per-vCPU callback vector, prefer that. */ + if (gsi && kvm_xen_has_vcpu_callback_vector()) { + in_kernel = kvm_xen_has_cap(EVTCHN_SEND); + gsi = 0; + } + if (!ret) { /* If vector delivery was turned *off* then tell the kernel */ if ((s->callback_param >> CALLBACK_VIA_TYPE_SHIFT) == diff --git a/include/sysemu/kvm_xen.h b/include/sysemu/kvm_xen.h index 595abfbe40..961c702c4e 100644 --- a/include/sysemu/kvm_xen.h +++ b/include/sysemu/kvm_xen.h @@ -22,6 +22,7 @@ int kvm_xen_soft_reset(void); uint32_t kvm_xen_get_caps(void); void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id); +bool kvm_xen_has_vcpu_callback_vector(void); void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type); void kvm_xen_set_callback_asserted(void); int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port); diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c index 7c504d9fa4..75b2c557b9 100644 --- a/target/i386/kvm/xen-emu.c +++ b/target/i386/kvm/xen-emu.c @@ -424,6 +424,13 @@ void kvm_xen_set_callback_asserted(void) } } +bool kvm_xen_has_vcpu_callback_vector(void) +{ + CPUState *cs = qemu_get_cpu(0); + + return cs && !!X86_CPU(cs)->env.xen_vcpu_callback_vector; +} + void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type) { CPUState *cs = qemu_get_cpu(vcpu_id); From 3de75ed352411899dbc9222e82fe164890c77e78 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 18 Oct 2023 13:31:20 +0100 Subject: [PATCH 4/7] hw/xen: don't clear map_track[] in xen_gnttab_reset() The refcounts actually correspond to 'active_ref' structures stored in a GHashTable per "user" on the backend side (mostly, per XenDevice). If we zero map_track[] on reset, then when the backend drivers get torn down and release their mapping we hit the assert(s->map_track[ref] != 0) in gnt_unref(). So leave them in place. Each backend driver will disconnect and reconnect as the guest comes back up again and reconnects, and it all works out OK in the end as the old refs get dropped. Cc: qemu-stable@nongnu.org Fixes: de26b2619789 ("hw/xen: Implement soft reset for emulated gnttab") Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xen_gnttab.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c index 21c30e3659..839ec920a1 100644 --- a/hw/i386/kvm/xen_gnttab.c +++ b/hw/i386/kvm/xen_gnttab.c @@ -541,7 +541,5 @@ int xen_gnttab_reset(void) s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access; s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE); - memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1); - return 0; } From 4a5780f52095f1daf23618dc6198a2a1665ea505 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 17 Oct 2023 13:34:18 +0100 Subject: [PATCH 5/7] hw/xen: fix XenStore watch delivery to guest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When fire_watch_cb() found the response buffer empty, it would call deliver_watch() to generate the XS_WATCH_EVENT message in the response buffer and send an event channel notification to the guest… without actually *copying* the response buffer into the ring. So there was nothing for the guest to see. The pending response didn't actually get processed into the ring until the guest next triggered some activity from its side. Add the missing call to put_rsp(). It might have been slightly nicer to call xen_xenstore_event() here, which would *almost* have worked. Except for the fact that it calls xen_be_evtchn_pending() to check that it really does have an event pending (and clear the eventfd for next time). And under Xen it's defined that setting that fd to O_NONBLOCK isn't guaranteed to work, so the emu implementation follows suit. This fixes Xen device hot-unplug. Cc: qemu-stable@nongnu.org Fixes: 0254c4d19df ("hw/xen: Add xenstore wire implementation and implementation stubs") Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xen_xenstore.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c index 660d0b72f9..8e716a7009 100644 --- a/hw/i386/kvm/xen_xenstore.c +++ b/hw/i386/kvm/xen_xenstore.c @@ -1357,10 +1357,12 @@ static void fire_watch_cb(void *opaque, const char *path, const char *token) } else { deliver_watch(s, path, token); /* - * If the message was queued because there was already ring activity, - * no need to wake the guest. But if not, we need to send the evtchn. + * Attempt to queue the message into the actual ring, and send + * the event channel notification if any bytes are copied. */ - xen_be_evtchn_notify(s->eh, s->be_port); + if (s->rsp_pending && put_rsp(s) > 0) { + xen_be_evtchn_notify(s->eh, s->be_port); + } } } From debc995e883b05c2fd02fb797a61ab1328e5bae2 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 24 Oct 2023 22:22:47 +0100 Subject: [PATCH 6/7] hw/xen: take iothread mutex in xen_evtchn_reset_op() The xen_evtchn_soft_reset() function requires the iothread mutex, but is also called for the EVTCHNOP_reset hypercall. Ensure the mutex is taken in that case. Cc: qemu-stable@nongnu.org Fixes: a15b10978fe6 ("hw/xen: Implement EVTCHNOP_reset") Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/i386/kvm/xen_evtchn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c index 3d6f4b4a0a..b2b4be9983 100644 --- a/hw/i386/kvm/xen_evtchn.c +++ b/hw/i386/kvm/xen_evtchn.c @@ -1135,6 +1135,7 @@ int xen_evtchn_reset_op(struct evtchn_reset *reset) return -ESRCH; } + QEMU_IOTHREAD_LOCK_GUARD(); return xen_evtchn_soft_reset(); } From a1c1082908dde4867b1ac55f546bea0c17d52318 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 20 Oct 2023 18:00:18 +0100 Subject: [PATCH 7/7] hw/xen: use correct default protocol for xen-block on x86 Even on x86_64 the default protocol is the x86-32 one if the guest doesn't specifically ask for x86-64. Cc: qemu-stable@nongnu.org Fixes: b6af8926fb85 ("xen: add implementations of xen-block connect and disconnect functions...") Signed-off-by: David Woodhouse Reviewed-by: Paul Durrant --- hw/block/xen-block.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c index a07cd7eb5d..bfa53960c3 100644 --- a/hw/block/xen-block.c +++ b/hw/block/xen-block.c @@ -115,9 +115,13 @@ static void xen_block_connect(XenDevice *xendev, Error **errp) return; } - if (xen_device_frontend_scanf(xendev, "protocol", "%ms", - &str) != 1) { - protocol = BLKIF_PROTOCOL_NATIVE; + if (xen_device_frontend_scanf(xendev, "protocol", "%ms", &str) != 1) { + /* x86 defaults to the 32-bit protocol even for 64-bit guests. */ + if (object_dynamic_cast(OBJECT(qdev_get_machine()), "x86-machine")) { + protocol = BLKIF_PROTOCOL_X86_32; + } else { + protocol = BLKIF_PROTOCOL_NATIVE; + } } else { if (strcmp(str, XEN_IO_PROTO_ABI_X86_32) == 0) { protocol = BLKIF_PROTOCOL_X86_32;