From 993e38020fddc65e7536c5e1469c22588e5a9f16 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 4 Jun 2024 16:40:57 +0200 Subject: [PATCH 01/46] docs, tests: do not specify scsi=off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This has been the default forever. Acked-by: Alex Bennée Signed-off-by: Paolo Bonzini --- docs/pci_expander_bridge.txt | 2 +- docs/specs/tpm.rst | 2 +- tests/avocado/intel_iommu.py | 2 +- tests/avocado/smmu.py | 2 +- tests/avocado/tuxrun_baselines.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/pci_expander_bridge.txt b/docs/pci_expander_bridge.txt index 36750273bb..540191f5e0 100644 --- a/docs/pci_expander_bridge.txt +++ b/docs/pci_expander_bridge.txt @@ -25,7 +25,7 @@ A detailed command line would be: -object memory-backend-ram,size=1024M,policy=bind,host-nodes=1,id=ram-node1 -numa node,nodeid=1,cpus=1,memdev=ram-node1 -device pxb,id=bridge1,bus=pci.0,numa_node=1,bus_nr=4 -netdev user,id=nd -device e1000,bus=bridge1,addr=0x4,netdev=nd -device pxb,id=bridge2,bus=pci.0,numa_node=0,bus_nr=8 -device e1000,bus=bridge2,addr=0x3 --device pxb,id=bridge3,bus=pci.0,bus_nr=40 -drive if=none,id=drive0,file=[img] -device virtio-blk-pci,drive=drive0,scsi=off,bus=bridge3,addr=1 +-device pxb,id=bridge3,bus=pci.0,bus_nr=40 -drive if=none,id=drive0,file=[img] -device virtio-blk-pci,drive=drive0,bus=bridge3,addr=1 Here you have: - 2 NUMA nodes for the guest, 0 and 1. (both mapped to the same NUMA node in host, but you can and should put it in different host NUMA nodes) diff --git a/docs/specs/tpm.rst b/docs/specs/tpm.rst index 68cb8cf7e6..1ad36ad709 100644 --- a/docs/specs/tpm.rst +++ b/docs/specs/tpm.rst @@ -336,7 +336,7 @@ In case a pSeries machine is emulated, use the following command line: -tpmdev emulator,id=tpm0,chardev=chrtpm \ -device tpm-spapr,tpmdev=tpm0 \ -device spapr-vscsi,id=scsi0,reg=0x00002000 \ - -device virtio-blk-pci,scsi=off,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \ + -device virtio-blk-pci,bus=pci.0,addr=0x3,drive=drive-virtio-disk0,id=virtio-disk0 \ -drive file=test.img,format=raw,if=none,id=drive-virtio-disk0 In case an Arm virt machine is emulated, use the following command line: diff --git a/tests/avocado/intel_iommu.py b/tests/avocado/intel_iommu.py index f04ee1cf9d..09e694bd40 100644 --- a/tests/avocado/intel_iommu.py +++ b/tests/avocado/intel_iommu.py @@ -32,7 +32,7 @@ class IntelIOMMU(LinuxTest): def set_up_boot(self): path = self.download_boot() - self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,scsi=off,' + + self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,' + 'drive=drv0,id=virtio-disk0,bootindex=1,' 'werror=stop,rerror=stop' + self.IOMMU_ADDON) self.vm.add_args('-device', 'virtio-gpu-pci' + self.IOMMU_ADDON) diff --git a/tests/avocado/smmu.py b/tests/avocado/smmu.py index 21ff030ca7..4ebfa7128c 100644 --- a/tests/avocado/smmu.py +++ b/tests/avocado/smmu.py @@ -32,7 +32,7 @@ class SMMU(LinuxTest): def set_up_boot(self): path = self.download_boot() - self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,scsi=off,' + + self.vm.add_args('-device', 'virtio-blk-pci,bus=pcie.0,' + 'drive=drv0,id=virtio-disk0,bootindex=1,' 'werror=stop,rerror=stop' + self.IOMMU_ADDON) self.vm.add_args('-drive', diff --git a/tests/avocado/tuxrun_baselines.py b/tests/avocado/tuxrun_baselines.py index a936a3b780..736e4aa289 100644 --- a/tests/avocado/tuxrun_baselines.py +++ b/tests/avocado/tuxrun_baselines.py @@ -235,7 +235,7 @@ class TuxRunBaselineTest(QemuSystemTest): self.vm.add_args('-drive', 'file=' + qcow2.name + ',format=qcow2,if=none,id=' 'drive-virtio-disk1', - '-device', 'virtio-blk-pci,scsi=off,bus=pci.0,' + '-device', 'virtio-blk-pci,bus=pci.0,' 'addr=0xb,drive=drive-virtio-disk1,id=virtio-disk1' ',bootindex=2') self.common_tuxrun(csums=sums, drive="scsi-hd") From a271b8d7b2f39275a05e49deb7c8edc20b7a8279 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 10 May 2024 15:51:57 +0200 Subject: [PATCH 02/46] virtio-blk: remove SCSI passthrough functionality The legacy SCSI passthrough functionality has never been enabled for VIRTIO 1.0 and was deprecated more than four years ago. Get rid of it---almost, because QEMU is advertising it unconditionally for legacy virtio-blk devices. Just parse the header and return a nonzero status. Signed-off-by: Paolo Bonzini --- docs/about/deprecated.rst | 10 -- docs/about/removed-features.rst | 8 ++ hw/block/virtio-blk.c | 166 +++----------------------------- hw/core/machine.c | 2 - 4 files changed, 19 insertions(+), 167 deletions(-) diff --git a/docs/about/deprecated.rst b/docs/about/deprecated.rst index 40585ca7d5..4980d721cf 100644 --- a/docs/about/deprecated.rst +++ b/docs/about/deprecated.rst @@ -296,16 +296,6 @@ Device options Emulated device options ''''''''''''''''''''''' -``-device virtio-blk,scsi=on|off`` (since 5.0) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The virtio-blk SCSI passthrough feature is a legacy VIRTIO feature. VIRTIO 1.0 -and later do not support it because the virtio-scsi device was introduced for -full SCSI support. Use virtio-scsi instead when SCSI passthrough is required. - -Note this also applies to ``-device virtio-blk-pci,scsi=on|off``, which is an -alias. - ``-device nvme-ns,eui64-default=on|off`` (since 7.1) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/about/removed-features.rst b/docs/about/removed-features.rst index fba0cfb0b0..ae6269eb56 100644 --- a/docs/about/removed-features.rst +++ b/docs/about/removed-features.rst @@ -510,6 +510,14 @@ than zero. Removed along with the ``compression`` migration capability. +``-device virtio-blk,scsi=on|off`` (since 9.1) +'''''''''''''''''''''''''''''''''''''''''''''' + +The virtio-blk SCSI passthrough feature is a legacy VIRTIO feature. VIRTIO 1.0 +and later do not support it because the virtio-scsi device was introduced for +full SCSI support. Use virtio-scsi instead when SCSI passthrough is required. + + User-mode emulator command line arguments ----------------------------------------- diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index bb86e65f65..73bdfd6122 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -172,57 +172,6 @@ static void virtio_blk_discard_write_zeroes_complete(void *opaque, int ret) virtio_blk_free_request(req); } -#ifdef __linux__ - -typedef struct { - VirtIOBlockReq *req; - struct sg_io_hdr hdr; -} VirtIOBlockIoctlReq; - -static void virtio_blk_ioctl_complete(void *opaque, int status) -{ - VirtIOBlockIoctlReq *ioctl_req = opaque; - VirtIOBlockReq *req = ioctl_req->req; - VirtIOBlock *s = req->dev; - VirtIODevice *vdev = VIRTIO_DEVICE(s); - struct virtio_scsi_inhdr *scsi; - struct sg_io_hdr *hdr; - - scsi = (void *)req->elem.in_sg[req->elem.in_num - 2].iov_base; - - if (status) { - status = VIRTIO_BLK_S_UNSUPP; - virtio_stl_p(vdev, &scsi->errors, 255); - goto out; - } - - hdr = &ioctl_req->hdr; - /* - * From SCSI-Generic-HOWTO: "Some lower level drivers (e.g. ide-scsi) - * clear the masked_status field [hence status gets cleared too, see - * block/scsi_ioctl.c] even when a CHECK_CONDITION or COMMAND_TERMINATED - * status has occurred. However they do set DRIVER_SENSE in driver_status - * field. Also a (sb_len_wr > 0) indicates there is a sense buffer. - */ - if (hdr->status == 0 && hdr->sb_len_wr > 0) { - hdr->status = CHECK_CONDITION; - } - - virtio_stl_p(vdev, &scsi->errors, - hdr->status | (hdr->msg_status << 8) | - (hdr->host_status << 16) | (hdr->driver_status << 24)); - virtio_stl_p(vdev, &scsi->residual, hdr->resid); - virtio_stl_p(vdev, &scsi->sense_len, hdr->sb_len_wr); - virtio_stl_p(vdev, &scsi->data_len, hdr->dxfer_len); - -out: - virtio_blk_req_complete(req, status); - virtio_blk_free_request(req); - g_free(ioctl_req); -} - -#endif - static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s, VirtQueue *vq) { VirtIOBlockReq *req = virtqueue_pop(vq, sizeof(VirtIOBlockReq)); @@ -233,20 +182,14 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s, VirtQueue *vq) return req; } -static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) +static void virtio_blk_handle_scsi(VirtIOBlockReq *req) { - int status = VIRTIO_BLK_S_OK; - struct virtio_scsi_inhdr *scsi = NULL; + int status; + struct virtio_scsi_inhdr *scsi; VirtIOBlock *blk = req->dev; VirtIODevice *vdev = VIRTIO_DEVICE(blk); VirtQueueElement *elem = &req->elem; -#ifdef __linux__ - int i; - VirtIOBlockIoctlReq *ioctl_req; - BlockAIOCB *acb; -#endif - /* * We require at least one output segment each for the virtio_blk_outhdr * and the SCSI command block. @@ -262,95 +205,16 @@ static int virtio_blk_handle_scsi_req(VirtIOBlockReq *req) /* * The scsi inhdr is placed in the second-to-last input segment, just * before the regular inhdr. + * + * Just put anything nonzero so that the ioctl fails in the guest. */ scsi = (void *)elem->in_sg[elem->in_num - 2].iov_base; - - if (!virtio_has_feature(blk->host_features, VIRTIO_BLK_F_SCSI)) { - status = VIRTIO_BLK_S_UNSUPP; - goto fail; - } - - /* - * No support for bidirection commands yet. - */ - if (elem->out_num > 2 && elem->in_num > 3) { - status = VIRTIO_BLK_S_UNSUPP; - goto fail; - } - -#ifdef __linux__ - ioctl_req = g_new0(VirtIOBlockIoctlReq, 1); - ioctl_req->req = req; - ioctl_req->hdr.interface_id = 'S'; - ioctl_req->hdr.cmd_len = elem->out_sg[1].iov_len; - ioctl_req->hdr.cmdp = elem->out_sg[1].iov_base; - ioctl_req->hdr.dxfer_len = 0; - - if (elem->out_num > 2) { - /* - * If there are more than the minimally required 2 output segments - * there is write payload starting from the third iovec. - */ - ioctl_req->hdr.dxfer_direction = SG_DXFER_TO_DEV; - ioctl_req->hdr.iovec_count = elem->out_num - 2; - - for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { - ioctl_req->hdr.dxfer_len += elem->out_sg[i + 2].iov_len; - } - - ioctl_req->hdr.dxferp = elem->out_sg + 2; - - } else if (elem->in_num > 3) { - /* - * If we have more than 3 input segments the guest wants to actually - * read data. - */ - ioctl_req->hdr.dxfer_direction = SG_DXFER_FROM_DEV; - ioctl_req->hdr.iovec_count = elem->in_num - 3; - for (i = 0; i < ioctl_req->hdr.iovec_count; i++) { - ioctl_req->hdr.dxfer_len += elem->in_sg[i].iov_len; - } - - ioctl_req->hdr.dxferp = elem->in_sg; - } else { - /* - * Some SCSI commands don't actually transfer any data. - */ - ioctl_req->hdr.dxfer_direction = SG_DXFER_NONE; - } - - ioctl_req->hdr.sbp = elem->in_sg[elem->in_num - 3].iov_base; - ioctl_req->hdr.mx_sb_len = elem->in_sg[elem->in_num - 3].iov_len; - - acb = blk_aio_ioctl(blk->blk, SG_IO, &ioctl_req->hdr, - virtio_blk_ioctl_complete, ioctl_req); - if (!acb) { - g_free(ioctl_req); - status = VIRTIO_BLK_S_UNSUPP; - goto fail; - } - return -EINPROGRESS; -#else - abort(); -#endif + virtio_stl_p(vdev, &scsi->errors, 255); + status = VIRTIO_BLK_S_UNSUPP; fail: - /* Just put anything nonzero so that the ioctl fails in the guest. */ - if (scsi) { - virtio_stl_p(vdev, &scsi->errors, 255); - } - return status; -} - -static void virtio_blk_handle_scsi(VirtIOBlockReq *req) -{ - int status; - - status = virtio_blk_handle_scsi_req(req); - if (status != -EINPROGRESS) { - virtio_blk_req_complete(req, status); - virtio_blk_free_request(req); - } + virtio_blk_req_complete(req, status); + virtio_blk_free_request(req); } static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb, @@ -1379,13 +1243,9 @@ static uint64_t virtio_blk_get_features(VirtIODevice *vdev, uint64_t features, virtio_add_feature(&features, VIRTIO_BLK_F_GEOMETRY); virtio_add_feature(&features, VIRTIO_BLK_F_TOPOLOGY); virtio_add_feature(&features, VIRTIO_BLK_F_BLK_SIZE); - if (virtio_has_feature(features, VIRTIO_F_VERSION_1)) { - if (virtio_has_feature(s->host_features, VIRTIO_BLK_F_SCSI)) { - error_setg(errp, "Please set scsi=off for virtio-blk devices in order to use virtio 1.0"); - return 0; - } - } else { + if (!virtio_has_feature(features, VIRTIO_F_VERSION_1)) { virtio_clear_feature(&features, VIRTIO_F_ANY_LAYOUT); + /* Added for historical reasons, removing it could break migration. */ virtio_add_feature(&features, VIRTIO_BLK_F_SCSI); } @@ -2132,10 +1992,6 @@ static Property virtio_blk_properties[] = { DEFINE_PROP_STRING("serial", VirtIOBlock, conf.serial), DEFINE_PROP_BIT64("config-wce", VirtIOBlock, host_features, VIRTIO_BLK_F_CONFIG_WCE, true), -#ifdef __linux__ - DEFINE_PROP_BIT64("scsi", VirtIOBlock, host_features, - VIRTIO_BLK_F_SCSI, false), -#endif DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0, true), DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, diff --git a/hw/core/machine.c b/hw/core/machine.c index 8087026b45..17292b13e6 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -265,8 +265,6 @@ GlobalProperty hw_compat_2_5[] = { const size_t hw_compat_2_5_len = G_N_ELEMENTS(hw_compat_2_5); GlobalProperty hw_compat_2_4[] = { - /* Optional because the 'scsi' property is Linux-only */ - { "virtio-blk-device", "scsi", "true", .optional = true }, { "e1000", "extra_mac_registers", "off" }, { "virtio-pci", "x-disable-pcie", "on" }, { "virtio-pci", "migrate-extra", "off" }, From 72baef13b9dce71f20ae840d9951e559e14abf6d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 11:02:04 +0200 Subject: [PATCH 03/46] host/i386: nothing looks at CPUINFO_SSE4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only user was the SSE4.1 variant of buffer_is_zero, which has been removed; code to compute CPUINFO_SSE4 is dead. Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- host/include/i386/host/cpuinfo.h | 1 - util/cpuinfo-i386.c | 1 - 2 files changed, 2 deletions(-) diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h index b89e6d2e55..9386c74988 100644 --- a/host/include/i386/host/cpuinfo.h +++ b/host/include/i386/host/cpuinfo.h @@ -16,7 +16,6 @@ #define CPUINFO_BMI1 (1u << 5) #define CPUINFO_BMI2 (1u << 6) #define CPUINFO_SSE2 (1u << 7) -#define CPUINFO_SSE4 (1u << 8) #define CPUINFO_AVX1 (1u << 9) #define CPUINFO_AVX2 (1u << 10) #define CPUINFO_AVX512F (1u << 11) diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c index 9fddb18303..18ab747a6d 100644 --- a/util/cpuinfo-i386.c +++ b/util/cpuinfo-i386.c @@ -36,7 +36,6 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) info |= (d & bit_CMOV ? CPUINFO_CMOV : 0); info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0); - info |= (c & bit_SSE4_1 ? CPUINFO_SSE4 : 0); info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0); info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0); info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0); From 294ac64e459aca023f43441651d860980c9784f1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 10:37:06 +0200 Subject: [PATCH 04/46] meson: assume x86-64-v2 baseline ISA x86-64-v2 processors were released in 2008, assume that we have one. Unfortunately there is no GCC flag to enable all the features without disabling what came after; so enable them one by one. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- meson.build | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/meson.build b/meson.build index 6386607144..d80203f1cd 100644 --- a/meson.build +++ b/meson.build @@ -336,9 +336,13 @@ if host_arch == 'i386' and not cc.links(''' qemu_common_flags = ['-march=i486'] + qemu_common_flags endif -# ??? Only extremely old AMD cpus do not have cmpxchg16b. -# If we truly care, we should simply detect this case at -# runtime and generate the fallback to serial emulation. +# Assume x86-64-v2 (minus CMPXCHG16B for 32-bit code) +if host_arch == 'i386' + qemu_common_flags = ['-mfpmath=sse'] + qemu_common_flags +endif +if host_arch in ['i386', 'x86_64'] + qemu_common_flags = ['-mpopcnt', '-msse4.2'] + qemu_common_flags +endif if host_arch == 'x86_64' qemu_common_flags = ['-mcx16'] + qemu_common_flags endif From e68e97ce55b3d17af22dd62c3b3dc72f761b0862 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 10:14:48 +0200 Subject: [PATCH 05/46] host/i386: assume presence of CMOV QEMU now requires an x86-64-v2 host, which always has CMOV. Use it freely in TCG generated code. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- host/include/i386/host/cpuinfo.h | 1 - tcg/i386/tcg-target.c.inc | 15 +-------------- util/cpuinfo-i386.c | 1 - 3 files changed, 1 insertion(+), 16 deletions(-) diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h index 9386c74988..81771733ea 100644 --- a/host/include/i386/host/cpuinfo.h +++ b/host/include/i386/host/cpuinfo.h @@ -9,7 +9,6 @@ /* Digested version of */ #define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */ -#define CPUINFO_CMOV (1u << 1) #define CPUINFO_MOVBE (1u << 2) #define CPUINFO_LZCNT (1u << 3) #define CPUINFO_POPCNT (1u << 4) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 59235b4f38..9a54ef7f8d 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -157,12 +157,6 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define SOFTMMU_RESERVE_REGS \ (tcg_use_softmmu ? (1 << TCG_REG_L0) | (1 << TCG_REG_L1) : 0) -/* For 64-bit, we always know that CMOV is available. */ -#if TCG_TARGET_REG_BITS == 64 -# define have_cmov true -#else -# define have_cmov (cpuinfo & CPUINFO_CMOV) -#endif #define have_bmi2 (cpuinfo & CPUINFO_BMI2) #define have_lzcnt (cpuinfo & CPUINFO_LZCNT) @@ -1815,14 +1809,7 @@ static void tcg_out_setcond2(TCGContext *s, const TCGArg *args, static void tcg_out_cmov(TCGContext *s, int jcc, int rexw, TCGReg dest, TCGReg v1) { - if (have_cmov) { - tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1); - } else { - TCGLabel *over = gen_new_label(); - tcg_out_jxx(s, jcc ^ 1, over, 1); - tcg_out_mov(s, TCG_TYPE_I32, dest, v1); - tcg_out_label(s, over); - } + tcg_out_modrm(s, OPC_CMOVCC | jcc | rexw, dest, v1); } static void tcg_out_movcond(TCGContext *s, int rexw, TCGCond cond, diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c index 18ab747a6d..90f92a42dc 100644 --- a/util/cpuinfo-i386.c +++ b/util/cpuinfo-i386.c @@ -34,7 +34,6 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) if (max >= 1) { __cpuid(1, a, b, c, d); - info |= (d & bit_CMOV ? CPUINFO_CMOV : 0); info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0); info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0); info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0); From b18236897ca15c3db1506d8edb9a191dfe51429c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 11:02:46 +0200 Subject: [PATCH 06/46] host/i386: assume presence of SSE2 QEMU now requires an x86-64-v2 host, which has SSE2. Use it freely in buffer_is_zero. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- host/include/i386/host/cpuinfo.h | 1 - util/bufferiszero.c | 4 ++-- util/cpuinfo-i386.c | 1 - 3 files changed, 2 insertions(+), 4 deletions(-) diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h index 81771733ea..72f6fad61e 100644 --- a/host/include/i386/host/cpuinfo.h +++ b/host/include/i386/host/cpuinfo.h @@ -14,7 +14,6 @@ #define CPUINFO_POPCNT (1u << 4) #define CPUINFO_BMI1 (1u << 5) #define CPUINFO_BMI2 (1u << 6) -#define CPUINFO_SSE2 (1u << 7) #define CPUINFO_AVX1 (1u << 9) #define CPUINFO_AVX2 (1u << 10) #define CPUINFO_AVX512F (1u << 11) diff --git a/util/bufferiszero.c b/util/bufferiszero.c index 74864f7b78..11c080e02c 100644 --- a/util/bufferiszero.c +++ b/util/bufferiszero.c @@ -188,14 +188,14 @@ static biz_accel_fn const accel_table[] = { static unsigned best_accel(void) { +#ifdef CONFIG_AVX2_OPT unsigned info = cpuinfo_init(); -#ifdef CONFIG_AVX2_OPT if (info & CPUINFO_AVX2) { return 2; } #endif - return info & CPUINFO_SSE2 ? 1 : 0; + return 1; } #elif defined(__aarch64__) && defined(__ARM_NEON) diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c index 90f92a42dc..ca74ef04f5 100644 --- a/util/cpuinfo-i386.c +++ b/util/cpuinfo-i386.c @@ -34,7 +34,6 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) if (max >= 1) { __cpuid(1, a, b, c, d); - info |= (d & bit_SSE2 ? CPUINFO_SSE2 : 0); info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0); info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0); info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0); From 433cd6d94a8256af70a5200f236dc8047c3c1468 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 10:22:33 +0200 Subject: [PATCH 07/46] host/i386: assume presence of SSSE3 QEMU now requires an x86-64-v2 host, which has SSSE3 instructions (notably, PSHUFB which is used by QEMU's AES implementation). Do not bother checking it. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- util/cpuinfo-i386.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c index ca74ef04f5..6d474a6259 100644 --- a/util/cpuinfo-i386.c +++ b/util/cpuinfo-i386.c @@ -38,8 +38,8 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0); info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0); - /* Our AES support requires PSHUFB as well. */ - info |= ((c & bit_AES) && (c & bit_SSSE3) ? CPUINFO_AES : 0); + /* NOTE: our AES support requires SSSE3 (PSHUFB) as well. */ + info |= (c & bit_AES) ? CPUINFO_AES : 0; /* For AVX features, we must check available and usable. */ if ((c & bit_AVX) && (c & bit_OSXSAVE)) { From 45ccdbcb24baf99667997fac5cf60318e5e7db51 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 10:29:32 +0200 Subject: [PATCH 08/46] host/i386: assume presence of POPCNT QEMU now requires an x86-64-v2 host, which has the POPCNT instruction. Use it freely in TCG-generated code. Reviewed-by: Richard Henderson Signed-off-by: Paolo Bonzini --- host/include/i386/host/cpuinfo.h | 1 - tcg/i386/tcg-target.h | 5 ++--- util/cpuinfo-i386.c | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h index 72f6fad61e..c1e94d75ce 100644 --- a/host/include/i386/host/cpuinfo.h +++ b/host/include/i386/host/cpuinfo.h @@ -11,7 +11,6 @@ #define CPUINFO_ALWAYS (1u << 0) /* so cpuinfo is nonzero */ #define CPUINFO_MOVBE (1u << 2) #define CPUINFO_LZCNT (1u << 3) -#define CPUINFO_POPCNT (1u << 4) #define CPUINFO_BMI1 (1u << 5) #define CPUINFO_BMI2 (1u << 6) #define CPUINFO_AVX1 (1u << 9) diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 2f67a97e05..ecc6982728 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -111,7 +111,6 @@ typedef enum { #endif #define have_bmi1 (cpuinfo & CPUINFO_BMI1) -#define have_popcnt (cpuinfo & CPUINFO_POPCNT) #define have_avx1 (cpuinfo & CPUINFO_AVX1) #define have_avx2 (cpuinfo & CPUINFO_AVX2) #define have_movbe (cpuinfo & CPUINFO_MOVBE) @@ -143,7 +142,7 @@ typedef enum { #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 -#define TCG_TARGET_HAS_ctpop_i32 have_popcnt +#define TCG_TARGET_HAS_ctpop_i32 1 #define TCG_TARGET_HAS_deposit_i32 1 #define TCG_TARGET_HAS_extract_i32 1 #define TCG_TARGET_HAS_sextract_i32 1 @@ -178,7 +177,7 @@ typedef enum { #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 1 #define TCG_TARGET_HAS_ctz_i64 1 -#define TCG_TARGET_HAS_ctpop_i64 have_popcnt +#define TCG_TARGET_HAS_ctpop_i64 1 #define TCG_TARGET_HAS_deposit_i64 1 #define TCG_TARGET_HAS_extract_i64 1 #define TCG_TARGET_HAS_sextract_i64 0 diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c index 6d474a6259..8f2694d88f 100644 --- a/util/cpuinfo-i386.c +++ b/util/cpuinfo-i386.c @@ -35,7 +35,6 @@ unsigned __attribute__((constructor)) cpuinfo_init(void) __cpuid(1, a, b, c, d); info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0); - info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0); info |= (c & bit_PCLMUL ? CPUINFO_PCLMUL : 0); /* NOTE: our AES support requires SSSE3 (PSHUFB) as well. */ From da7c95920d027dbb00c6879c1da0216b19509191 Mon Sep 17 00:00:00 2001 From: Xinyu Li Date: Sun, 2 Jun 2024 18:09:04 +0800 Subject: [PATCH 09/46] target/i386: fix SSE and SSE2 feature check Features check of CPUID_SSE and CPUID_SSE2 should use cpuid_features, rather than cpuid_ext_features. Signed-off-by: Xinyu Li Reviewed-by: Zhao Liu Message-ID: <20240602100904.2137939-1-lixinyu20s@ict.ac.cn> Signed-off-by: Paolo Bonzini --- target/i386/tcg/decode-new.c.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 27dc1bb146..0ec849b003 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -2041,9 +2041,9 @@ static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) case X86_FEAT_PCLMULQDQ: return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ); case X86_FEAT_SSE: - return (s->cpuid_ext_features & CPUID_SSE); + return (s->cpuid_features & CPUID_SSE); case X86_FEAT_SSE2: - return (s->cpuid_ext_features & CPUID_SSE2); + return (s->cpuid_features & CPUID_SSE2); case X86_FEAT_SSE3: return (s->cpuid_ext_features & CPUID_EXT_SSE3); case X86_FEAT_SSSE3: From 0683fff1cf7d495e50955a946bf59f8b9c20c3f1 Mon Sep 17 00:00:00 2001 From: Xinyu Li Date: Sun, 2 Jun 2024 18:05:28 +0800 Subject: [PATCH 10/46] target/i386: fix memory opsize for Mov to/from Seg This commit fixes an issue with MOV instructions (0x8C and 0x8E) involving segment registers; MOV to segment register's source is 16-bit, while MOV from segment register has to explicitly set the memory operand size to 16 bits. Introduce a new flag X86_SPECIAL_Op0_Mw to handle this specification correctly. Signed-off-by: Xinyu Li Message-ID: <20240602100528.2135717-1-lixinyu20s@ict.ac.cn> Fixes: 5e9e21bcc4d ("target/i386: move 60-BF opcodes to new decoder", 2024-05-07) Signed-off-by: Paolo Bonzini --- target/i386/tcg/decode-new.c.inc | 13 +++++++++++-- target/i386/tcg/decode-new.h | 3 +++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc index 0ec849b003..0ff0866e8f 100644 --- a/target/i386/tcg/decode-new.c.inc +++ b/target/i386/tcg/decode-new.c.inc @@ -202,6 +202,7 @@ #define avx_movx .special = X86_SPECIAL_AVXExtMov, #define sextT0 .special = X86_SPECIAL_SExtT0, #define zextT0 .special = X86_SPECIAL_ZExtT0, +#define op0_Mw .special = X86_SPECIAL_Op0_Mw, #define vex1 .vex_class = 1, #define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar, @@ -1576,9 +1577,10 @@ static const X86OpEntry opcodes_root[256] = { [0x89] = X86_OP_ENTRY3(MOV, E,v, G,v, None, None), [0x8A] = X86_OP_ENTRY3(MOV, G,b, E,b, None, None), [0x8B] = X86_OP_ENTRY3(MOV, G,v, E,v, None, None), - [0x8C] = X86_OP_ENTRY3(MOV, E,v, S,w, None, None), + /* Missing in Table A-2: memory destination is always 16-bit. */ + [0x8C] = X86_OP_ENTRY3(MOV, E,v, S,w, None, None, op0_Mw), [0x8D] = X86_OP_ENTRY3(LEA, G,v, M,v, None, None, noseg), - [0x8E] = X86_OP_ENTRY3(MOV, S,w, E,v, None, None), + [0x8E] = X86_OP_ENTRY3(MOV, S,w, E,w, None, None), [0x8F] = X86_OP_GROUPw(group1A, E,v), [0x98] = X86_OP_ENTRY1(CBW, 0,v), /* rAX */ @@ -2514,6 +2516,13 @@ static void disas_insn(DisasContext *s, CPUState *cpu) s->override = -1; break; + case X86_SPECIAL_Op0_Mw: + assert(decode.op[0].unit == X86_OP_INT); + if (decode.op[0].has_ea) { + decode.op[0].ot = MO_16; + } + break; + default: break; } diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h index 51ef0e621b..1f90cf9640 100644 --- a/target/i386/tcg/decode-new.h +++ b/target/i386/tcg/decode-new.h @@ -203,6 +203,9 @@ typedef enum X86InsnSpecial { /* When loaded into s->T0, register operand 1 is zero/sign extended. */ X86_SPECIAL_SExtT0, X86_SPECIAL_ZExtT0, + + /* Memory operand size of MOV from segment register is MO_16 */ + X86_SPECIAL_Op0_Mw, } X86InsnSpecial; /* From f2c04bede384608a1ab4f66865dd0e2a433ed94c Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Mon, 3 Jun 2024 16:07:23 +0800 Subject: [PATCH 11/46] target/i386/tcg: Fix RDPID feature check DisasContext.cpuid_ext_features indicates CPUID.01H.ECX. Use DisasContext.cpuid_7_0_ecx_features field to check RDPID feature bit (CPUID_7_0_ECX_RDPID). Fixes: 6750485bf42a ("target/i386: implement RDPID in TCG") Inspired-by: Xinyu Li Signed-off-by: Zhao Liu Message-ID: <20240603080723.1256662-1-zhao1.liu@intel.com> Signed-off-by: Paolo Bonzini --- target/i386/tcg/translate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 6dedfe94c0..0486ab6911 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -3199,7 +3199,7 @@ static void disas_insn_old(DisasContext *s, CPUState *cpu, int b) goto illegal_op; } if (s->prefix & PREFIX_REPZ) { - if (!(s->cpuid_ext_features & CPUID_7_0_ECX_RDPID)) { + if (!(s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_RDPID)) { goto illegal_op; } gen_helper_rdpid(s->T0, tcg_env); From 7604bbc2d87d153e65e38cf2d671a5a9a35917b1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 3 Jun 2024 12:01:12 +0200 Subject: [PATCH 12/46] target/i386: fix xsave.flat from kvm-unit-tests xsave.flat checks that "executing the XSETBV instruction causes a general- protection fault (#GP) if ECX = 0 and EAX[2:1] has the value 10b". QEMU allows that option, so the test fails. Add the condition. Cc: qemu-stable@nongnu.org Fixes: 892544317fe ("target/i386: implement XSAVE and XRSTOR of AVX registers", 2022-10-18) Reported-by: Thomas Huth Signed-off-by: Paolo Bonzini --- target/i386/tcg/fpu_helper.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c index e322293371..e1b850f3fc 100644 --- a/target/i386/tcg/fpu_helper.c +++ b/target/i386/tcg/fpu_helper.c @@ -3142,6 +3142,11 @@ void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) goto do_gpf; } + /* SSE can be disabled, but only if AVX is disabled too. */ + if ((mask & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) == XSTATE_YMM_MASK) { + goto do_gpf; + } + /* Disallow enabling unimplemented features. */ cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); ena = ((uint64_t)ena_hi << 32) | ena_lo; From ef7c70f020ca1fe9e7c98ea2cd9d6ba3c5714716 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 3 Jun 2024 13:49:49 +0200 Subject: [PATCH 13/46] update-linux-headers: fix forwarding to asm-generic headers Afer commit 3efc75ad9d9 ("scripts/update-linux-headers.sh: Remove temporary directory inbetween", 2024-05-29), updating linux-headers/ results in errors such as cp: cannot stat '/tmp/tmp.1A1Eejh1UE/headers/include/asm/bitsperlong.h': No such file or directory because Loongarch does not have an asm/bitsperlong.h file and uses the generic version. Before commit 3efc75ad9d9, the missing file would incorrectly cause stale files to be included in linux-headers/. The files were never committed to qemu.git, but were wrong nevertheless. The build would just use the system version of the files, which is opposite to the idea of importing Linux header files into QEMU's tree. Create forwarding headers, resembling the ones that are generated during a kernel build by scripts/Makefile.asm-generic, if a file is only installed under include/asm-generic/. Reviewed-by: Thomas Huth Signed-off-by: Paolo Bonzini --- scripts/update-linux-headers.sh | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 23afe8c08a..57a48837aa 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -118,7 +118,14 @@ for arch in $ARCHLIST; do rm -rf "$output/linux-headers/asm-$arch" mkdir -p "$output/linux-headers/asm-$arch" for header in kvm.h unistd.h bitsperlong.h mman.h; do - cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" + if test -f "$hdrdir/include/asm/$header"; then + cp "$hdrdir/include/asm/$header" "$output/linux-headers/asm-$arch" + elif test -f "$hdrdir/include/asm-generic/$header"; then + # not installed as , but used as such in kernel sources + cat <$output/linux-headers/asm-$arch/$header +#include +EOF + fi done if [ $arch = mips ]; then From b8116f4cbaa0f64bb07564f20b3b5219e23c8bff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 3 Jun 2024 14:16:55 +0200 Subject: [PATCH 14/46] update-linux-headers: move pvpanic.h to correct directory Linux has , not . Use the same directory for QEMU's include/standard-headers/ copy. Reviewed-by: Thomas Huth Signed-off-by: Paolo Bonzini --- hw/misc/pvpanic-isa.c | 2 +- hw/misc/pvpanic-pci.c | 2 +- hw/misc/pvpanic.c | 2 +- include/standard-headers/{linux => misc}/pvpanic.h | 0 scripts/update-linux-headers.sh | 6 ++++-- 5 files changed, 7 insertions(+), 5 deletions(-) rename include/standard-headers/{linux => misc}/pvpanic.h (100%) diff --git a/hw/misc/pvpanic-isa.c b/hw/misc/pvpanic-isa.c index ccec50f61b..b4f84c4110 100644 --- a/hw/misc/pvpanic-isa.c +++ b/hw/misc/pvpanic-isa.c @@ -21,7 +21,7 @@ #include "hw/misc/pvpanic.h" #include "qom/object.h" #include "hw/isa/isa.h" -#include "standard-headers/linux/pvpanic.h" +#include "standard-headers/misc/pvpanic.h" #include "hw/acpi/acpi_aml_interface.h" OBJECT_DECLARE_SIMPLE_TYPE(PVPanicISAState, PVPANIC_ISA_DEVICE) diff --git a/hw/misc/pvpanic-pci.c b/hw/misc/pvpanic-pci.c index 83be95d0d2..4d44a881da 100644 --- a/hw/misc/pvpanic-pci.c +++ b/hw/misc/pvpanic-pci.c @@ -21,7 +21,7 @@ #include "hw/misc/pvpanic.h" #include "qom/object.h" #include "hw/pci/pci_device.h" -#include "standard-headers/linux/pvpanic.h" +#include "standard-headers/misc/pvpanic.h" OBJECT_DECLARE_SIMPLE_TYPE(PVPanicPCIState, PVPANIC_PCI_DEVICE) diff --git a/hw/misc/pvpanic.c b/hw/misc/pvpanic.c index 1540e9091a..80289ecf5f 100644 --- a/hw/misc/pvpanic.c +++ b/hw/misc/pvpanic.c @@ -21,7 +21,7 @@ #include "hw/qdev-properties.h" #include "hw/misc/pvpanic.h" #include "qom/object.h" -#include "standard-headers/linux/pvpanic.h" +#include "standard-headers/misc/pvpanic.h" static void handle_event(int event) { diff --git a/include/standard-headers/linux/pvpanic.h b/include/standard-headers/misc/pvpanic.h similarity index 100% rename from include/standard-headers/linux/pvpanic.h rename to include/standard-headers/misc/pvpanic.h diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 57a48837aa..7e93acb3b5 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -231,10 +231,12 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ "$hdrdir/include/linux/const.h" \ "$hdrdir/include/linux/kernel.h" \ "$hdrdir/include/linux/vhost_types.h" \ - "$hdrdir/include/linux/sysinfo.h" \ - "$hdrdir/include/misc/pvpanic.h"; do + "$hdrdir/include/linux/sysinfo.h"; do cp_portable "$i" "$output/include/standard-headers/linux" done +mkdir -p "$output/include/standard-headers/misc" +cp_portable "$hdrdir/include/misc/pvpanic.h" \ + "$output/include/standard-headers/misc" mkdir -p "$output/include/standard-headers/drm" cp_portable "$hdrdir/include/drm/drm_fourcc.h" \ "$output/include/standard-headers/drm" From 5f69e42da5b40a2213f4db70ca461f554abca686 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:14 -0500 Subject: [PATCH 15/46] linux-headers: Update to current kvm/next This updates kernel headers to commit 6f627b425378 ("KVM: SVM: Add module parameter to enable SEV-SNP", 2024-05-12). The SNP host patches will be included in Linux 6.11, to be released next July. Also brings in an linux-headers/linux/vhost.h fix from v6.9-rc4. Co-developed-by: Michael Roth Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-3-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- linux-headers/asm-loongarch/kvm.h | 4 +++ linux-headers/asm-riscv/kvm.h | 1 + linux-headers/asm-x86/kvm.h | 52 ++++++++++++++++++++++++++++++- linux-headers/linux/vhost.h | 15 ++++----- 4 files changed, 64 insertions(+), 8 deletions(-) diff --git a/linux-headers/asm-loongarch/kvm.h b/linux-headers/asm-loongarch/kvm.h index 109785922c..f9abef3823 100644 --- a/linux-headers/asm-loongarch/kvm.h +++ b/linux-headers/asm-loongarch/kvm.h @@ -17,6 +17,8 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 +#define KVM_GUESTDBG_USE_SW_BP 0x00010000 + /* * for KVM_GET_REGS and KVM_SET_REGS */ @@ -72,6 +74,8 @@ struct kvm_fpu { #define KVM_REG_LOONGARCH_COUNTER (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 1) #define KVM_REG_LOONGARCH_VCPU_RESET (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 2) +/* Debugging: Special instruction for software breakpoint */ +#define KVM_REG_LOONGARCH_DEBUG_INST (KVM_REG_LOONGARCH_KVM | KVM_REG_SIZE_U64 | 3) #define LOONGARCH_REG_SHIFT 3 #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h index b1c503c295..e878e7cc39 100644 --- a/linux-headers/asm-riscv/kvm.h +++ b/linux-headers/asm-riscv/kvm.h @@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZFA, KVM_RISCV_ISA_EXT_ZTSO, KVM_RISCV_ISA_EXT_ZACAS, + KVM_RISCV_ISA_EXT_SSCOFPMF, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index 31c95c2dfe..1c8f918234 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -695,6 +695,11 @@ enum sev_cmd_id { /* Second time is the charm; improved versions of the above ioctls. */ KVM_SEV_INIT2, + /* SNP-specific commands */ + KVM_SEV_SNP_LAUNCH_START = 100, + KVM_SEV_SNP_LAUNCH_UPDATE, + KVM_SEV_SNP_LAUNCH_FINISH, + KVM_SEV_NR_MAX, }; @@ -709,7 +714,9 @@ struct kvm_sev_cmd { struct kvm_sev_init { __u64 vmsa_features; __u32 flags; - __u32 pad[9]; + __u16 ghcb_version; + __u16 pad1; + __u32 pad2[8]; }; struct kvm_sev_launch_start { @@ -820,6 +827,48 @@ struct kvm_sev_receive_update_data { __u32 pad2; }; +struct kvm_sev_snp_launch_start { + __u64 policy; + __u8 gosvw[16]; + __u16 flags; + __u8 pad0[6]; + __u64 pad1[4]; +}; + +/* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 +#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 +#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 +#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 +#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 + +struct kvm_sev_snp_launch_update { + __u64 gfn_start; + __u64 uaddr; + __u64 len; + __u8 type; + __u8 pad0; + __u16 flags; + __u32 pad1; + __u64 pad2[4]; +}; + +#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 +#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 +#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 + +struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; + __u8 pad0[3]; + __u16 flags; + __u64 pad1[4]; +}; + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) @@ -870,5 +919,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SW_PROTECTED_VM 1 #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 +#define KVM_X86_SNP_VM 4 #endif /* _ASM_X86_KVM_H */ diff --git a/linux-headers/linux/vhost.h b/linux-headers/linux/vhost.h index bea6973906..b95dd84eef 100644 --- a/linux-headers/linux/vhost.h +++ b/linux-headers/linux/vhost.h @@ -179,12 +179,6 @@ /* Get the config size */ #define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32) -/* Get the count of all virtqueues */ -#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) - -/* Get the number of virtqueue groups. */ -#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) - /* Get the number of address spaces. */ #define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int) @@ -228,10 +222,17 @@ #define VHOST_VDPA_GET_VRING_DESC_GROUP _IOWR(VHOST_VIRTIO, 0x7F, \ struct vhost_vring_state) + +/* Get the count of all virtqueues */ +#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32) + +/* Get the number of virtqueue groups. */ +#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32) + /* Get the queue size of a specific virtqueue. * userspace set the vring index in vhost_vring_state.index * kernel set the queue size in vhost_vring_state.num */ -#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x80, \ +#define VHOST_VDPA_GET_VRING_SIZE _IOWR(VHOST_VIRTIO, 0x82, \ struct vhost_vring_state) #endif From aa274c33c39e7de981dc195abe60e1a246c9d248 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 3 Jun 2024 14:25:06 +0200 Subject: [PATCH 16/46] update-linux-headers: import linux/kvm_para.h header Right now QEMU is importing arch/x86/include/uapi/asm/kvm_para.h because it includes definitions for kvmclock and for KVM CPUID bits. However, other definitions for KVM hypercall values and return codes are included in include/uapi/linux/kvm_para.h and they will be used by SEV-SNP. To ensure that it is possible to include both and "standard-headers/asm-x86/kvm_para.h" without conflicts, provide linux/kvm_para.h as a portable header too, and forward linux-headers/ files to those in include/standard-headers. Note that will include architecture-specific definitions as well, but "standard-headers/linux/kvm_para.h" will not because it can be used in architecture-independent files. This could easily be extended to other architectures, but right now they do not need any symbol in their specific kvm_para.h files. Reviewed-by: Thomas Huth Signed-off-by: Paolo Bonzini --- include/standard-headers/linux/kvm_para.h | 38 +++++++++++++++++++++++ linux-headers/asm-x86/kvm_para.h | 1 + linux-headers/linux/kvm_para.h | 2 ++ scripts/update-linux-headers.sh | 22 ++++++++++++- 4 files changed, 62 insertions(+), 1 deletion(-) create mode 100644 include/standard-headers/linux/kvm_para.h create mode 100644 linux-headers/asm-x86/kvm_para.h create mode 100644 linux-headers/linux/kvm_para.h diff --git a/include/standard-headers/linux/kvm_para.h b/include/standard-headers/linux/kvm_para.h new file mode 100644 index 0000000000..015c166302 --- /dev/null +++ b/include/standard-headers/linux/kvm_para.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_KVM_PARA_H +#define __LINUX_KVM_PARA_H + +/* + * This header file provides a method for making a hypercall to the host + * Architectures should define: + * - kvm_hypercall0, kvm_hypercall1... + * - kvm_arch_para_features + * - kvm_para_available + */ + +/* Return values for hypercalls */ +#define KVM_ENOSYS 1000 +#define KVM_EFAULT EFAULT +#define KVM_EINVAL EINVAL +#define KVM_E2BIG E2BIG +#define KVM_EPERM EPERM +#define KVM_EOPNOTSUPP 95 + +#define KVM_HC_VAPIC_POLL_IRQ 1 +#define KVM_HC_MMU_OP 2 +#define KVM_HC_FEATURES 3 +#define KVM_HC_PPC_MAP_MAGIC_PAGE 4 +#define KVM_HC_KICK_CPU 5 +#define KVM_HC_MIPS_GET_CLOCK_FREQ 6 +#define KVM_HC_MIPS_EXIT_VM 7 +#define KVM_HC_MIPS_CONSOLE_OUTPUT 8 +#define KVM_HC_CLOCK_PAIRING 9 +#define KVM_HC_SEND_IPI 10 +#define KVM_HC_SCHED_YIELD 11 +#define KVM_HC_MAP_GPA_RANGE 12 + +/* + * hypercalls use architecture specific + */ + +#endif /* __LINUX_KVM_PARA_H */ diff --git a/linux-headers/asm-x86/kvm_para.h b/linux-headers/asm-x86/kvm_para.h new file mode 100644 index 0000000000..1d3e0e0b07 --- /dev/null +++ b/linux-headers/asm-x86/kvm_para.h @@ -0,0 +1 @@ +#include "standard-headers/asm-x86/kvm_para.h" diff --git a/linux-headers/linux/kvm_para.h b/linux-headers/linux/kvm_para.h new file mode 100644 index 0000000000..6a1e672259 --- /dev/null +++ b/linux-headers/linux/kvm_para.h @@ -0,0 +1,2 @@ +#include "standard-headers/linux/kvm_para.h" +#include diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh index 7e93acb3b5..c34ac6454e 100755 --- a/scripts/update-linux-headers.sh +++ b/scripts/update-linux-headers.sh @@ -63,6 +63,7 @@ cp_portable() { -e 'linux/kernel' \ -e 'linux/sysinfo' \ -e 'asm/setup_data.h' \ + -e 'asm/kvm_para.h' \ > /dev/null then echo "Unexpected #include in input file $f". @@ -70,6 +71,15 @@ cp_portable() { fi header=$(basename "$f"); + + if test -z "$arch"; then + # Let users of include/standard-headers/linux/ headers pick the + # asm-* header that they care about + arch_cmd='/]*\)>/d' + else + arch_cmd='s/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' + fi + sed -e 's/__aligned_u64/__u64 __attribute__((aligned(8)))/g' \ -e 's/__u\([0-9][0-9]*\)/uint\1_t/g' \ -e 's/u\([0-9][0-9]*\)/uint\1_t/g' \ @@ -78,7 +88,7 @@ cp_portable() { -e 's/__be\([0-9][0-9]*\)/uint\1_t/g' \ -e 's/"\(input-event-codes\.h\)"/"standard-headers\/linux\/\1"/' \ -e 's/]*\)>/"standard-headers\/linux\/\1"/' \ - -e 's/]*\)>/"standard-headers\/asm-'$arch'\/\1"/' \ + -e "$arch_cmd" \ -e 's/__bitwise//' \ -e 's/__attribute__((packed))/QEMU_PACKED/' \ -e 's/__inline__/inline/' \ @@ -158,7 +168,12 @@ EOF cp "$hdrdir/include/asm/unistd_32.h" "$output/linux-headers/asm-x86/" cp "$hdrdir/include/asm/unistd_x32.h" "$output/linux-headers/asm-x86/" cp "$hdrdir/include/asm/unistd_64.h" "$output/linux-headers/asm-x86/" + cp_portable "$hdrdir/include/asm/kvm_para.h" "$output/include/standard-headers/asm-$arch" + cat <$output/linux-headers/asm-$arch/kvm_para.h +#include "standard-headers/asm-$arch/kvm_para.h" +EOF + # Remove everything except the macros from bootparam.h avoiding the # unnecessary import of several video/ist/etc headers sed -e '/__ASSEMBLY__/,/__ASSEMBLY__/d' \ @@ -208,6 +223,10 @@ if [ -d "$linux/LICENSES" ]; then done fi +cat <$output/linux-headers/linux/kvm_para.h +#include "standard-headers/linux/kvm_para.h" +#include +EOF cat <$output/linux-headers/linux/virtio_config.h #include "standard-headers/linux/virtio_config.h" EOF @@ -230,6 +249,7 @@ for i in "$hdrdir"/include/linux/*virtio*.h \ "$hdrdir/include/linux/ethtool.h" \ "$hdrdir/include/linux/const.h" \ "$hdrdir/include/linux/kernel.h" \ + "$hdrdir/include/linux/kvm_para.h" \ "$hdrdir/include/linux/vhost_types.h" \ "$hdrdir/include/linux/sysinfo.h"; do cp_portable "$i" "$output/include/standard-headers/linux" From dc0d28ca46c0e7ee3c055ad4da24022995bd3765 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 13:29:53 +0200 Subject: [PATCH 17/46] machine: allow early use of machine_require_guest_memfd Ask the ConfidentialGuestSupport object whether to use guest_memfd for KVM-backend private memory. This bool can be set in instance_init (or user_complete) so that it is available when the machine is created. Signed-off-by: Paolo Bonzini --- hw/core/machine.c | 2 +- include/exec/confidential-guest-support.h | 5 +++++ include/hw/boards.h | 1 - 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 17292b13e6..77a356f232 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -1216,7 +1216,7 @@ bool machine_mem_merge(MachineState *machine) bool machine_require_guest_memfd(MachineState *machine) { - return machine->require_guest_memfd; + return machine->cgs && machine->cgs->require_guest_memfd; } static char *cpu_slot_to_string(const CPUArchId *cpu) diff --git a/include/exec/confidential-guest-support.h b/include/exec/confidential-guest-support.h index e5b188cffb..02dc4e518f 100644 --- a/include/exec/confidential-guest-support.h +++ b/include/exec/confidential-guest-support.h @@ -31,6 +31,11 @@ OBJECT_DECLARE_TYPE(ConfidentialGuestSupport, struct ConfidentialGuestSupport { Object parent; + /* + * True if the machine should use guest_memfd for RAM. + */ + bool require_guest_memfd; + /* * ready: flag set by CGS initialization code once it's ready to * start executing instructions in a potentially-secure diff --git a/include/hw/boards.h b/include/hw/boards.h index 2fa800f11a..73ad319d7d 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -375,7 +375,6 @@ struct MachineState { char *dt_compatible; bool dump_guest_core; bool mem_merge; - bool require_guest_memfd; bool usb; bool usb_disabled; char *firmware; From 18c453409a3a84cf7b2c764c5a03fb429a73bbeb Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:13 -0500 Subject: [PATCH 18/46] i386/sev: Replace error_report with error_setg Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-2-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index d30b68c11e..67ed32e5ea 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -952,13 +952,13 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) if (sev_es_enabled()) { if (!kvm_kernel_irqchip_allowed()) { - error_report("%s: SEV-ES guests require in-kernel irqchip support", - __func__); + error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" + "support", __func__); goto err; } if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { - error_report("%s: guest policy requires SEV-ES, but " + error_setg(errp, "%s: guest policy requires SEV-ES, but " "host SEV-ES support unavailable", __func__); goto err; From 16dcf200dc951c1cde3e5b442457db5f690b8cf0 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:16 -0500 Subject: [PATCH 19/46] i386/sev: Introduce "sev-common" type to encapsulate common SEV state Currently all SEV/SEV-ES functionality is managed through a single 'sev-guest' QOM type. With upcoming support for SEV-SNP, taking this same approach won't work well since some of the properties/state managed by 'sev-guest' is not applicable to SEV-SNP, which will instead rely on a new QOM type with its own set of properties/state. To prepare for this, this patch moves common state into an abstract 'sev-common' parent type to encapsulate properties/state that are common to both SEV/SEV-ES and SEV-SNP, leaving only SEV/SEV-ES-specific properties/state in the current 'sev-guest' type. This should not affect current behavior or command-line options. As part of this patch, some related changes are also made: - a static 'sev_guest' variable is currently used to keep track of the 'sev-guest' instance. SEV-SNP would similarly introduce an 'sev_snp_guest' static variable. But these instances are now available via qdev_get_machine()->cgs, so switch to using that instead and drop the static variable. - 'sev_guest' is currently used as the name for the static variable holding a pointer to the 'sev-guest' instance. Re-purpose the name as a local variable referring the 'sev-guest' instance, and use that consistently throughout the code so it can be easily distinguished from sev-common/sev-snp-guest instances. - 'sev' is generally used as the name for local variables holding a pointer to the 'sev-guest' instance. In cases where that now points to common state, use the name 'sev_common'; in cases where that now points to state specific to 'sev-guest' instance, use the name 'sev_guest' In order to enable kernel-hashes for SNP, pull it from SevGuestProperties to its parent SevCommonProperties so it will be available for both SEV and SNP. Signed-off-by: Michael Roth Co-developed-by: Dov Murik Signed-off-by: Dov Murik Acked-by: Markus Armbruster (QAPI schema) Co-developed-by: Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-5-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- qapi/qom.json | 40 ++-- target/i386/sev.c | 493 ++++++++++++++++++++++++++-------------------- target/i386/sev.h | 3 + 3 files changed, 303 insertions(+), 233 deletions(-) diff --git a/qapi/qom.json b/qapi/qom.json index 38dde6d785..056b38f491 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -875,20 +875,12 @@ 'data': { '*filename': 'str' } } ## -# @SevGuestProperties: +# @SevCommonProperties: # -# Properties for sev-guest objects. +# Properties common to objects that are derivatives of sev-common. # # @sev-device: SEV device to use (default: "/dev/sev") # -# @dh-cert-file: guest owners DH certificate (encoded with base64) -# -# @session-file: guest owners session parameters (encoded with base64) -# -# @policy: SEV policy value (default: 0x1) -# -# @handle: SEV firmware handle (default: 0) -# # @cbitpos: C-bit location in page table entry (default: 0) # # @reduced-phys-bits: number of bits in physical addresses that become @@ -898,6 +890,27 @@ # designated guest firmware page for measured boot with -kernel # (default: false) (since 6.2) # +# Since: 9.1 +## +{ 'struct': 'SevCommonProperties', + 'data': { '*sev-device': 'str', + '*cbitpos': 'uint32', + 'reduced-phys-bits': 'uint32', + '*kernel-hashes': 'bool' } } + +## +# @SevGuestProperties: +# +# Properties for sev-guest objects. +# +# @dh-cert-file: guest owners DH certificate (encoded with base64) +# +# @session-file: guest owners session parameters (encoded with base64) +# +# @policy: SEV policy value (default: 0x1) +# +# @handle: SEV firmware handle (default: 0) +# # @legacy-vm-type: Use legacy KVM_SEV_INIT KVM interface for creating the VM. # The newer KVM_SEV_INIT2 interface syncs additional vCPU # state when initializing the VMSA structures, which will @@ -909,14 +922,11 @@ # Since: 2.12 ## { 'struct': 'SevGuestProperties', - 'data': { '*sev-device': 'str', - '*dh-cert-file': 'str', + 'base': 'SevCommonProperties', + 'data': { '*dh-cert-file': 'str', '*session-file': 'str', '*policy': 'uint32', '*handle': 'uint32', - '*cbitpos': 'uint32', - 'reduced-phys-bits': 'uint32', - '*kernel-hashes': 'bool', '*legacy-vm-type': 'bool' } } ## diff --git a/target/i386/sev.c b/target/i386/sev.c index 67ed32e5ea..33e606eea0 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -40,9 +40,36 @@ #include "hw/i386/pc.h" #include "exec/address-spaces.h" -#define TYPE_SEV_GUEST "sev-guest" -OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) +OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) +OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) +struct SevCommonState { + X86ConfidentialGuest parent_obj; + + int kvm_type; + + /* configuration parameters */ + char *sev_device; + uint32_t cbitpos; + uint32_t reduced_phys_bits; + bool kernel_hashes; + + /* runtime state */ + uint8_t api_major; + uint8_t api_minor; + uint8_t build_id; + int sev_fd; + SevState state; + + uint32_t reset_cs; + uint32_t reset_ip; + bool reset_data_valid; +}; + +struct SevCommonStateClass { + X86ConfidentialGuestClass parent_class; + +}; /** * SevGuestState: @@ -55,32 +82,15 @@ OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) * -machine ...,memory-encryption=sev0 */ struct SevGuestState { - X86ConfidentialGuest parent_obj; - - int kvm_type; + SevCommonState parent_obj; + gchar *measurement; /* configuration parameters */ - char *sev_device; + uint32_t handle; uint32_t policy; char *dh_cert_file; char *session_file; - uint32_t cbitpos; - uint32_t reduced_phys_bits; - bool kernel_hashes; bool legacy_vm_type; - - /* runtime state */ - uint32_t handle; - uint8_t api_major; - uint8_t api_minor; - uint8_t build_id; - int sev_fd; - SevState state; - gchar *measurement; - - uint32_t reset_cs; - uint32_t reset_ip; - bool reset_data_valid; }; #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ @@ -128,7 +138,6 @@ typedef struct QEMU_PACKED PaddedSevHashTable { QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); -static SevGuestState *sev_guest; static Error *sev_mig_blocker; static const char *const sev_fw_errlist[] = { @@ -209,21 +218,21 @@ fw_error_to_str(int code) } static bool -sev_check_state(const SevGuestState *sev, SevState state) +sev_check_state(const SevCommonState *sev_common, SevState state) { - assert(sev); - return sev->state == state ? true : false; + assert(sev_common); + return sev_common->state == state ? true : false; } static void -sev_set_guest_state(SevGuestState *sev, SevState new_state) +sev_set_guest_state(SevCommonState *sev_common, SevState new_state) { assert(new_state < SEV_STATE__MAX); - assert(sev); + assert(sev_common); - trace_kvm_sev_change_state(SevState_str(sev->state), + trace_kvm_sev_change_state(SevState_str(sev_common->state), SevState_str(new_state)); - sev->state = new_state; + sev_common->state = new_state; } static void @@ -290,121 +299,61 @@ static struct RAMBlockNotifier sev_ram_notifier = { .ram_block_removed = sev_ram_block_removed, }; -static void -sev_guest_finalize(Object *obj) -{ -} - -static char * -sev_guest_get_session_file(Object *obj, Error **errp) -{ - SevGuestState *s = SEV_GUEST(obj); - - return s->session_file ? g_strdup(s->session_file) : NULL; -} - -static void -sev_guest_set_session_file(Object *obj, const char *value, Error **errp) -{ - SevGuestState *s = SEV_GUEST(obj); - - s->session_file = g_strdup(value); -} - -static char * -sev_guest_get_dh_cert_file(Object *obj, Error **errp) -{ - SevGuestState *s = SEV_GUEST(obj); - - return g_strdup(s->dh_cert_file); -} - -static void -sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) -{ - SevGuestState *s = SEV_GUEST(obj); - - s->dh_cert_file = g_strdup(value); -} - -static char * -sev_guest_get_sev_device(Object *obj, Error **errp) -{ - SevGuestState *sev = SEV_GUEST(obj); - - return g_strdup(sev->sev_device); -} - -static void -sev_guest_set_sev_device(Object *obj, const char *value, Error **errp) -{ - SevGuestState *sev = SEV_GUEST(obj); - - sev->sev_device = g_strdup(value); -} - -static bool sev_guest_get_kernel_hashes(Object *obj, Error **errp) -{ - SevGuestState *sev = SEV_GUEST(obj); - - return sev->kernel_hashes; -} - -static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) -{ - SevGuestState *sev = SEV_GUEST(obj); - - sev->kernel_hashes = value; -} - -static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) -{ - return SEV_GUEST(obj)->legacy_vm_type; -} - -static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) -{ - SEV_GUEST(obj)->legacy_vm_type = value; -} - bool sev_enabled(void) { - return !!sev_guest; + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + + return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); } bool sev_es_enabled(void) { - return sev_enabled() && (sev_guest->policy & SEV_POLICY_ES); + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + + return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES); } uint32_t sev_get_cbit_position(void) { - return sev_guest ? sev_guest->cbitpos : 0; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + return sev_common ? sev_common->cbitpos : 0; } uint32_t sev_get_reduced_phys_bits(void) { - return sev_guest ? sev_guest->reduced_phys_bits : 0; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + return sev_common ? sev_common->reduced_phys_bits : 0; } static SevInfo *sev_get_info(void) { SevInfo *info; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevGuestState *sev_guest = + (SevGuestState *)object_dynamic_cast(OBJECT(sev_common), + TYPE_SEV_GUEST); info = g_new0(SevInfo, 1); info->enabled = sev_enabled(); if (info->enabled) { - info->api_major = sev_guest->api_major; - info->api_minor = sev_guest->api_minor; - info->build_id = sev_guest->build_id; - info->policy = sev_guest->policy; - info->state = sev_guest->state; - info->handle = sev_guest->handle; + if (sev_guest) { + info->handle = sev_guest->handle; + } + info->api_major = sev_common->api_major; + info->api_minor = sev_common->api_minor; + info->build_id = sev_common->build_id; + info->state = sev_common->state; + /* we only report the lower 32-bits of policy for SNP, ok for now... */ + info->policy = + (uint32_t)object_property_get_uint(OBJECT(sev_common), + "policy", NULL); } return info; @@ -530,6 +479,8 @@ static SevCapability *sev_get_capabilities(Error **errp) size_t pdh_len = 0, cert_chain_len = 0, cpu0_id_len = 0; uint32_t ebx; int fd; + SevCommonState *sev_common; + char *sev_device; if (!kvm_enabled()) { error_setg(errp, "KVM not enabled"); @@ -540,12 +491,21 @@ static SevCapability *sev_get_capabilities(Error **errp) return NULL; } - fd = open(DEFAULT_SEV_DEVICE, O_RDWR); + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + if (!sev_common) { + error_setg(errp, "SEV is not configured"); + } + + sev_device = object_property_get_str(OBJECT(sev_common), "sev-device", + &error_abort); + fd = open(sev_device, O_RDWR); if (fd < 0) { error_setg_errno(errp, errno, "SEV: Failed to open %s", DEFAULT_SEV_DEVICE); + g_free(sev_device); return NULL; } + g_free(sev_device); if (sev_get_pdh_info(fd, &pdh_data, &pdh_len, &cert_chain_data, &cert_chain_len, errp)) { @@ -588,7 +548,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, { struct kvm_sev_attestation_report input = {}; SevAttestationReport *report = NULL; - SevGuestState *sev = sev_guest; + SevCommonState *sev_common; g_autofree guchar *data = NULL; g_autofree guchar *buf = NULL; gsize len; @@ -613,8 +573,10 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, return NULL; } + sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + /* Query the report length */ - ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, &input, &err); if (ret < 0) { if (err != SEV_RET_INVALID_LEN) { @@ -630,7 +592,7 @@ static SevAttestationReport *sev_get_attestation_report(const char *mnonce, memcpy(input.mnonce, buf, sizeof(input.mnonce)); /* Query the report */ - ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, &input, &err); if (ret) { error_setg_errno(errp, errno, "SEV: Failed to get attestation report" @@ -670,26 +632,27 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) } static int -sev_launch_start(SevGuestState *sev) +sev_launch_start(SevGuestState *sev_guest) { gsize sz; int ret = 1; int fw_error, rc; struct kvm_sev_launch_start start = { - .handle = sev->handle, .policy = sev->policy + .handle = sev_guest->handle, .policy = sev_guest->policy }; guchar *session = NULL, *dh_cert = NULL; + SevCommonState *sev_common = SEV_COMMON(sev_guest); - if (sev->session_file) { - if (sev_read_file_base64(sev->session_file, &session, &sz) < 0) { + if (sev_guest->session_file) { + if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { goto out; } start.session_uaddr = (unsigned long)session; start.session_len = sz; } - if (sev->dh_cert_file) { - if (sev_read_file_base64(sev->dh_cert_file, &dh_cert, &sz) < 0) { + if (sev_guest->dh_cert_file) { + if (sev_read_file_base64(sev_guest->dh_cert_file, &dh_cert, &sz) < 0) { goto out; } start.dh_uaddr = (unsigned long)dh_cert; @@ -697,15 +660,15 @@ sev_launch_start(SevGuestState *sev) } trace_kvm_sev_launch_start(start.policy, session, dh_cert); - rc = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); + rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); if (rc < 0) { error_report("%s: LAUNCH_START ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); goto out; } - sev_set_guest_state(sev, SEV_STATE_LAUNCH_UPDATE); - sev->handle = start.handle; + sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); + sev_guest->handle = start.handle; ret = 0; out: @@ -715,7 +678,7 @@ out: } static int -sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) +sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) { int ret, fw_error; struct kvm_sev_launch_update_data update; @@ -727,7 +690,7 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) update.uaddr = (uintptr_t)addr; update.len = len; trace_kvm_sev_launch_update_data(addr, len); - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, &update, &fw_error); if (ret) { error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", @@ -738,11 +701,12 @@ sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) } static int -sev_launch_update_vmsa(SevGuestState *sev) +sev_launch_update_vmsa(SevGuestState *sev_guest) { int ret, fw_error; - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); + ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, + NULL, &fw_error); if (ret) { error_report("%s: LAUNCH_UPDATE_VMSA ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); @@ -754,18 +718,19 @@ sev_launch_update_vmsa(SevGuestState *sev) static void sev_launch_get_measure(Notifier *notifier, void *unused) { - SevGuestState *sev = sev_guest; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevGuestState *sev_guest = SEV_GUEST(sev_common); int ret, error; g_autofree guchar *data = NULL; struct kvm_sev_launch_measure measurement = {}; - if (!sev_check_state(sev, SEV_STATE_LAUNCH_UPDATE)) { + if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { return; } if (sev_es_enabled()) { /* measure all the VM save areas before getting launch_measure */ - ret = sev_launch_update_vmsa(sev); + ret = sev_launch_update_vmsa(sev_guest); if (ret) { exit(1); } @@ -773,7 +738,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) } /* query the measurement blob length */ - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, &measurement, &error); if (!measurement.len) { error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", @@ -785,7 +750,7 @@ sev_launch_get_measure(Notifier *notifier, void *unused) measurement.uaddr = (unsigned long)data; /* get the measurement blob */ - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_MEASURE, &measurement, &error); if (ret) { error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", @@ -793,17 +758,19 @@ sev_launch_get_measure(Notifier *notifier, void *unused) return; } - sev_set_guest_state(sev, SEV_STATE_LAUNCH_SECRET); + sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_SECRET); /* encode the measurement value and emit the event */ - sev->measurement = g_base64_encode(data, measurement.len); - trace_kvm_sev_launch_measurement(sev->measurement); + sev_guest->measurement = g_base64_encode(data, measurement.len); + trace_kvm_sev_launch_measurement(sev_guest->measurement); } static char *sev_get_launch_measurement(void) { + SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); + if (sev_guest && - sev_guest->state >= SEV_STATE_LAUNCH_SECRET) { + SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { return g_strdup(sev_guest->measurement); } @@ -832,19 +799,20 @@ static Notifier sev_machine_done_notify = { }; static void -sev_launch_finish(SevGuestState *sev) +sev_launch_finish(SevGuestState *sev_guest) { int ret, error; trace_kvm_sev_launch_finish(); - ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, &error); + ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, + &error); if (ret) { error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", __func__, ret, error, fw_error_to_str(error)); exit(1); } - sev_set_guest_state(sev, SEV_STATE_RUNNING); + sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING); /* add migration blocker */ error_setg(&sev_mig_blocker, @@ -855,38 +823,40 @@ sev_launch_finish(SevGuestState *sev) static void sev_vm_state_change(void *opaque, bool running, RunState state) { - SevGuestState *sev = opaque; + SevCommonState *sev_common = opaque; if (running) { - if (!sev_check_state(sev, SEV_STATE_RUNNING)) { - sev_launch_finish(sev); + if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { + sev_launch_finish(SEV_GUEST(sev_common)); } } } static int sev_kvm_type(X86ConfidentialGuest *cg) { - SevGuestState *sev = SEV_GUEST(cg); + SevCommonState *sev_common = SEV_COMMON(cg); + SevGuestState *sev_guest = SEV_GUEST(sev_common); int kvm_type; - if (sev->kvm_type != -1) { + if (sev_common->kvm_type != -1) { goto out; } - kvm_type = (sev->policy & SEV_POLICY_ES) ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; - if (kvm_is_vm_type_supported(kvm_type) && !sev->legacy_vm_type) { - sev->kvm_type = kvm_type; + kvm_type = (sev_guest->policy & SEV_POLICY_ES) ? + KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM; + if (kvm_is_vm_type_supported(kvm_type) && !sev_guest->legacy_vm_type) { + sev_common->kvm_type = kvm_type; } else { - sev->kvm_type = KVM_X86_DEFAULT_VM; + sev_common->kvm_type = KVM_X86_DEFAULT_VM; } out: - return sev->kvm_type; + return sev_common->kvm_type; } static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { - SevGuestState *sev = SEV_GUEST(cgs); + SevCommonState *sev_common = SEV_COMMON(cgs); char *devname; int ret, fw_error, cmd; uint32_t ebx; @@ -899,8 +869,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return -1; } - sev_guest = sev; - sev->state = SEV_STATE_UNINIT; + sev_common->state = SEV_STATE_UNINIT; host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); host_cbitpos = ebx & 0x3f; @@ -910,9 +879,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) * register of CPUID 0x8000001F. No need to verify the range as the * comparison against the host value accomplishes that. */ - if (host_cbitpos != sev->cbitpos) { + if (host_cbitpos != sev_common->cbitpos) { error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", - __func__, host_cbitpos, sev->cbitpos); + __func__, host_cbitpos, sev_common->cbitpos); goto err; } @@ -921,16 +890,17 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) * the EBX register of CPUID 0x8000001F, so verify the supplied value * is in the range of 1 to 63. */ - if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { + if (sev_common->reduced_phys_bits < 1 || + sev_common->reduced_phys_bits > 63) { error_setg(errp, "%s: reduced_phys_bits check failed," " it should be in the range of 1 to 63, requested '%d'", - __func__, sev->reduced_phys_bits); + __func__, sev_common->reduced_phys_bits); goto err; } - devname = object_property_get_str(OBJECT(sev), "sev-device", NULL); - sev->sev_fd = open(devname, O_RDWR); - if (sev->sev_fd < 0) { + devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); + sev_common->sev_fd = open(devname, O_RDWR); + if (sev_common->sev_fd < 0) { error_setg(errp, "%s: Failed to open %s '%s'", __func__, devname, strerror(errno)); g_free(devname); @@ -938,7 +908,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } g_free(devname); - ret = sev_platform_ioctl(sev->sev_fd, SEV_PLATFORM_STATUS, &status, + ret = sev_platform_ioctl(sev_common->sev_fd, SEV_PLATFORM_STATUS, &status, &fw_error); if (ret) { error_setg(errp, "%s: failed to get platform status ret=%d " @@ -946,9 +916,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) fw_error_to_str(fw_error)); goto err; } - sev->build_id = status.build; - sev->api_major = status.api_major; - sev->api_minor = status.api_minor; + sev_common->build_id = status.build; + sev_common->api_major = status.api_major; + sev_common->api_minor = status.api_minor; if (sev_es_enabled()) { if (!kvm_kernel_irqchip_allowed()) { @@ -966,14 +936,14 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } trace_kvm_sev_init(); - if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev)) == KVM_X86_DEFAULT_VM) { + if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; - ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); + ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); } else { struct kvm_sev_init args = { 0 }; - ret = sev_ioctl(sev->sev_fd, KVM_SEV_INIT2, &args, &fw_error); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_INIT2, &args, &fw_error); } if (ret) { @@ -982,7 +952,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) goto err; } - ret = sev_launch_start(sev); + sev_launch_start(SEV_GUEST(sev_common)); if (ret) { error_setg(errp, "%s: failed to create encryption context", __func__); goto err; @@ -990,13 +960,12 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) ram_block_notifier_add(&sev_ram_notifier); qemu_add_machine_init_done_notifier(&sev_machine_done_notify); - qemu_add_vm_change_state_handler(sev_vm_state_change, sev); + qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); cgs->ready = true; return 0; err: - sev_guest = NULL; ram_block_discard_disable(false); return -1; } @@ -1004,13 +973,15 @@ err: int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) { - if (!sev_guest) { + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + if (!sev_common) { return 0; } /* if SEV is in update state then encrypt the data else do nothing */ - if (sev_check_state(sev_guest, SEV_STATE_LAUNCH_UPDATE)) { - int ret = sev_launch_update_data(sev_guest, ptr, len); + if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { + int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len); if (ret < 0) { error_setg(errp, "SEV: Failed to encrypt pflash rom"); return ret; @@ -1030,16 +1001,17 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, void *hva; gsize hdr_sz = 0, data_sz = 0; MemoryRegion *mr = NULL; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - if (!sev_guest) { + if (!sev_common) { error_setg(errp, "SEV not enabled for guest"); return 1; } /* secret can be injected only in this state */ - if (!sev_check_state(sev_guest, SEV_STATE_LAUNCH_SECRET)) { + if (!sev_check_state(sev_common, SEV_STATE_LAUNCH_SECRET)) { error_setg(errp, "SEV: Not in correct state. (LSECRET) %x", - sev_guest->state); + sev_common->state); return 1; } @@ -1073,7 +1045,7 @@ int sev_inject_launch_secret(const char *packet_hdr, const char *secret, trace_kvm_sev_launch_secret(gpa, input.guest_uaddr, input.trans_uaddr, input.trans_len); - ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_LAUNCH_SECRET, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_SECRET, &input, &error); if (ret) { error_setg(errp, "SEV: failed to inject secret ret=%d fw_error=%d '%s'", @@ -1180,9 +1152,10 @@ void sev_es_set_reset_vector(CPUState *cpu) { X86CPU *x86; CPUX86State *env; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); /* Only update if we have valid reset information */ - if (!sev_guest || !sev_guest->reset_data_valid) { + if (!sev_common || !sev_common->reset_data_valid) { return; } @@ -1194,11 +1167,11 @@ void sev_es_set_reset_vector(CPUState *cpu) x86 = X86_CPU(cpu); env = &x86->env; - cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_guest->reset_cs, 0xffff, + cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_common->reset_cs, 0xffff, DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); - env->eip = sev_guest->reset_ip; + env->eip = sev_common->reset_ip; } int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) @@ -1206,6 +1179,7 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) CPUState *cpu; uint32_t addr; int ret; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); if (!sev_es_enabled()) { return 0; @@ -1219,9 +1193,9 @@ int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) } if (addr) { - sev_guest->reset_cs = addr & 0xffff0000; - sev_guest->reset_ip = addr & 0x0000ffff; - sev_guest->reset_data_valid = true; + sev_common->reset_cs = addr & 0xffff0000; + sev_common->reset_ip = addr & 0x0000ffff; + sev_common->reset_data_valid = true; CPU_FOREACH(cpu) { sev_es_set_reset_vector(cpu); @@ -1267,12 +1241,13 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) hwaddr mapped_len = sizeof(*padded_ht); MemTxAttrs attrs = { 0 }; bool ret = true; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); /* * Only add the kernel hashes if the sev-guest configuration explicitly * stated kernel-hashes=on. */ - if (!sev_guest->kernel_hashes) { + if (!sev_common->kernel_hashes) { return false; } @@ -1363,8 +1338,30 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) return ret; } +static char * +sev_common_get_sev_device(Object *obj, Error **errp) +{ + return g_strdup(SEV_COMMON(obj)->sev_device); +} + static void -sev_guest_class_init(ObjectClass *oc, void *data) +sev_common_set_sev_device(Object *obj, const char *value, Error **errp) +{ + SEV_COMMON(obj)->sev_device = g_strdup(value); +} + +static bool sev_common_get_kernel_hashes(Object *obj, Error **errp) +{ + return SEV_COMMON(obj)->kernel_hashes; +} + +static void sev_common_set_kernel_hashes(Object *obj, bool value, Error **errp) +{ + SEV_COMMON(obj)->kernel_hashes = value; +} + +static void +sev_common_class_init(ObjectClass *oc, void *data) { ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); @@ -1373,10 +1370,87 @@ sev_guest_class_init(ObjectClass *oc, void *data) x86_klass->kvm_type = sev_kvm_type; object_class_property_add_str(oc, "sev-device", - sev_guest_get_sev_device, - sev_guest_set_sev_device); + sev_common_get_sev_device, + sev_common_set_sev_device); object_class_property_set_description(oc, "sev-device", "SEV device to use"); + object_class_property_add_bool(oc, "kernel-hashes", + sev_common_get_kernel_hashes, + sev_common_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); +} + +static void +sev_common_instance_init(Object *obj) +{ + SevCommonState *sev_common = SEV_COMMON(obj); + + sev_common->kvm_type = -1; + + sev_common->sev_device = g_strdup(DEFAULT_SEV_DEVICE); + + object_property_add_uint32_ptr(obj, "cbitpos", &sev_common->cbitpos, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "reduced-phys-bits", + &sev_common->reduced_phys_bits, + OBJ_PROP_FLAG_READWRITE); +} + +/* sev guest info common to sev/sev-es/sev-snp */ +static const TypeInfo sev_common_info = { + .parent = TYPE_X86_CONFIDENTIAL_GUEST, + .name = TYPE_SEV_COMMON, + .instance_size = sizeof(SevCommonState), + .instance_init = sev_common_instance_init, + .class_size = sizeof(SevCommonStateClass), + .class_init = sev_common_class_init, + .abstract = true, + .interfaces = (InterfaceInfo[]) { + { TYPE_USER_CREATABLE }, + { } + } +}; + +static char * +sev_guest_get_dh_cert_file(Object *obj, Error **errp) +{ + return g_strdup(SEV_GUEST(obj)->dh_cert_file); +} + +static void +sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) +{ + SEV_GUEST(obj)->dh_cert_file = g_strdup(value); +} + +static char * +sev_guest_get_session_file(Object *obj, Error **errp) +{ + SevGuestState *sev_guest = SEV_GUEST(obj); + + return sev_guest->session_file ? g_strdup(sev_guest->session_file) : NULL; +} + +static void +sev_guest_set_session_file(Object *obj, const char *value, Error **errp) +{ + SEV_GUEST(obj)->session_file = g_strdup(value); +} + +static bool sev_guest_get_legacy_vm_type(Object *obj, Error **errp) +{ + return SEV_GUEST(obj)->legacy_vm_type; +} + +static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) +{ + SEV_GUEST(obj)->legacy_vm_type = value; +} + +static void +sev_guest_class_init(ObjectClass *oc, void *data) +{ object_class_property_add_str(oc, "dh-cert-file", sev_guest_get_dh_cert_file, sev_guest_set_dh_cert_file); @@ -1387,11 +1461,6 @@ sev_guest_class_init(ObjectClass *oc, void *data) sev_guest_set_session_file); object_class_property_set_description(oc, "session-file", "guest owners session parameters (encoded with base64)"); - object_class_property_add_bool(oc, "kernel-hashes", - sev_guest_get_kernel_hashes, - sev_guest_set_kernel_hashes); - object_class_property_set_description(oc, "kernel-hashes", - "add kernel hashes to guest firmware for measured Linux boot"); object_class_property_add_bool(oc, "legacy-vm-type", sev_guest_get_legacy_vm_type, sev_guest_set_legacy_vm_type); @@ -1402,41 +1471,29 @@ sev_guest_class_init(ObjectClass *oc, void *data) static void sev_guest_instance_init(Object *obj) { - SevGuestState *sev = SEV_GUEST(obj); + SevGuestState *sev_guest = SEV_GUEST(obj); - sev->kvm_type = -1; - - sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); - sev->policy = DEFAULT_GUEST_POLICY; - object_property_add_uint32_ptr(obj, "policy", &sev->policy, + sev_guest->policy = DEFAULT_GUEST_POLICY; + object_property_add_uint32_ptr(obj, "handle", &sev_guest->handle, OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "handle", &sev->handle, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, - OBJ_PROP_FLAG_READWRITE); - object_property_add_uint32_ptr(obj, "reduced-phys-bits", - &sev->reduced_phys_bits, + object_property_add_uint32_ptr(obj, "policy", &sev_guest->policy, OBJ_PROP_FLAG_READWRITE); object_apply_compat_props(obj); } -/* sev guest info */ +/* guest info specific sev/sev-es */ static const TypeInfo sev_guest_info = { - .parent = TYPE_X86_CONFIDENTIAL_GUEST, + .parent = TYPE_SEV_COMMON, .name = TYPE_SEV_GUEST, .instance_size = sizeof(SevGuestState), - .instance_finalize = sev_guest_finalize, - .class_init = sev_guest_class_init, .instance_init = sev_guest_instance_init, - .interfaces = (InterfaceInfo[]) { - { TYPE_USER_CREATABLE }, - { } - } + .class_init = sev_guest_class_init, }; static void sev_register_types(void) { + type_register_static(&sev_common_info); type_register_static(&sev_guest_info); } diff --git a/target/i386/sev.h b/target/i386/sev.h index 9e10d09539..668374eef3 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -20,6 +20,9 @@ #include "exec/confidential-guest-support.h" +#define TYPE_SEV_COMMON "sev-common" +#define TYPE_SEV_GUEST "sev-guest" + #define SEV_POLICY_NODBG 0x1 #define SEV_POLICY_NOKS 0x2 #define SEV_POLICY_ES 0x4 From 6600f1ac0c81cbe67faf048ea07f78542dea925f Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:17 -0500 Subject: [PATCH 20/46] i386/sev: Move sev_launch_update to separate class method When sev-snp-guest objects are introduced there will be a number of differences in how the launch data is handled compared to the existing sev-guest object. Move sev_launch_start() to a class method to make it easier to implement SNP-specific launch update functionality later. Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-6-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 33e606eea0..b2aa0d6f99 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -69,6 +69,8 @@ struct SevCommonState { struct SevCommonStateClass { X86ConfidentialGuestClass parent_class; + /* public */ + int (*launch_start)(SevCommonState *sev_common); }; /** @@ -632,16 +634,16 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) } static int -sev_launch_start(SevGuestState *sev_guest) +sev_launch_start(SevCommonState *sev_common) { gsize sz; int ret = 1; int fw_error, rc; + SevGuestState *sev_guest = SEV_GUEST(sev_common); struct kvm_sev_launch_start start = { .handle = sev_guest->handle, .policy = sev_guest->policy }; guchar *session = NULL, *dh_cert = NULL; - SevCommonState *sev_common = SEV_COMMON(sev_guest); if (sev_guest->session_file) { if (sev_read_file_base64(sev_guest->session_file, &session, &sz) < 0) { @@ -862,6 +864,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) uint32_t ebx; uint32_t host_cbitpos; struct sev_user_data_status status = {}; + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); ret = ram_block_discard_disable(true); if (ret) { @@ -952,7 +955,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) goto err; } - sev_launch_start(SEV_GUEST(sev_common)); + ret = klass->launch_start(sev_common); if (ret) { error_setg(errp, "%s: failed to create encryption context", __func__); goto err; @@ -1451,6 +1454,10 @@ static void sev_guest_set_legacy_vm_type(Object *obj, bool value, Error **errp) static void sev_guest_class_init(ObjectClass *oc, void *data) { + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + + klass->launch_start = sev_launch_start; + object_class_property_add_str(oc, "dh-cert-file", sev_guest_get_dh_cert_file, sev_guest_set_dh_cert_file); From bce615a14aec07cab0488e5a242f6a91e641efcb Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:18 -0500 Subject: [PATCH 21/46] i386/sev: Move sev_launch_finish to separate class method When sev-snp-guest objects are introduced there will be a number of differences in how the launch finish is handled compared to the existing sev-guest object. Move sev_launch_finish() to a class method to make it easier to implement SNP-specific launch update functionality later. Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-7-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index b2aa0d6f99..28a018ed83 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -71,6 +71,7 @@ struct SevCommonStateClass { /* public */ int (*launch_start)(SevCommonState *sev_common); + void (*launch_finish)(SevCommonState *sev_common); }; /** @@ -801,12 +802,12 @@ static Notifier sev_machine_done_notify = { }; static void -sev_launch_finish(SevGuestState *sev_guest) +sev_launch_finish(SevCommonState *sev_common) { int ret, error; trace_kvm_sev_launch_finish(); - ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, &error); if (ret) { error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", @@ -814,7 +815,7 @@ sev_launch_finish(SevGuestState *sev_guest) exit(1); } - sev_set_guest_state(SEV_COMMON(sev_guest), SEV_STATE_RUNNING); + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); /* add migration blocker */ error_setg(&sev_mig_blocker, @@ -826,10 +827,11 @@ static void sev_vm_state_change(void *opaque, bool running, RunState state) { SevCommonState *sev_common = opaque; + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(opaque); if (running) { if (!sev_check_state(sev_common, SEV_STATE_RUNNING)) { - sev_launch_finish(SEV_GUEST(sev_common)); + klass->launch_finish(sev_common); } } } @@ -1457,6 +1459,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); klass->launch_start = sev_launch_start; + klass->launch_finish = sev_launch_finish; object_class_property_add_str(oc, "dh-cert-file", sev_guest_get_dh_cert_file, From 7b34df44260b391e33bc3acf1ced30019d9aadf1 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 30 May 2024 06:16:19 -0500 Subject: [PATCH 22/46] i386/sev: Introduce 'sev-snp-guest' object SEV-SNP support relies on a different set of properties/state than the existing 'sev-guest' object. This patch introduces the 'sev-snp-guest' object, which can be used to configure an SEV-SNP guest. For example, a default-configured SEV-SNP guest with no additional information passed in for use with attestation: -object sev-snp-guest,id=sev0 or a fully-specified SEV-SNP guest where all spec-defined binary blobs are passed in as base64-encoded strings: -object sev-snp-guest,id=sev0, \ policy=0x30000, \ init-flags=0, \ id-block=YWFhYWFhYWFhYWFhYWFhCg==, \ id-auth=CxHK/OKLkXGn/KpAC7Wl1FSiisWDbGTEKz..., \ author-key-enabled=on, \ host-data=LNkCWBRC5CcdGXirbNUV1OrsR28s..., \ guest-visible-workarounds=AA==, \ See the QAPI schema updates included in this patch for more usage details. In some cases these blobs may be up to 4096 characters, but this is generally well below the default limit for linux hosts where command-line sizes are defined by the sysconf-configurable ARG_MAX value, which defaults to 2097152 characters for Ubuntu hosts, for example. Signed-off-by: Brijesh Singh Co-developed-by: Michael Roth Acked-by: Markus Armbruster (for QAPI schema) Signed-off-by: Michael Roth Co-developed-by: Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-8-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- docs/system/i386/amd-memory-encryption.rst | 70 +++++- qapi/qom.json | 58 +++++ target/i386/sev.c | 253 +++++++++++++++++++++ target/i386/sev.h | 1 + 4 files changed, 380 insertions(+), 2 deletions(-) diff --git a/docs/system/i386/amd-memory-encryption.rst b/docs/system/i386/amd-memory-encryption.rst index e9bc142bc1..748f5094ba 100644 --- a/docs/system/i386/amd-memory-encryption.rst +++ b/docs/system/i386/amd-memory-encryption.rst @@ -25,8 +25,8 @@ support for notifying a guest's operating system when certain types of VMEXITs are about to occur. This allows the guest to selectively share information with the hypervisor to satisfy the requested function. -Launching ---------- +Launching (SEV and SEV-ES) +-------------------------- Boot images (such as bios) must be encrypted before a guest can be booted. The ``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: ``LAUNCH_START``, @@ -161,6 +161,72 @@ The value of GCTX.LD is If kernel hashes are not used, or SEV-ES is disabled, use empty blobs for ``kernel_hashes_blob`` and ``vmsas_blob`` as needed. +Launching (SEV-SNP) +------------------- +Boot images (such as bios) must be encrypted before a guest can be booted. The +``MEMORY_ENCRYPT_OP`` ioctl provides commands to encrypt the images: +``SNP_LAUNCH_START``, ``SNP_LAUNCH_UPDATE``, and ``SNP_LAUNCH_FINISH``. These +three commands communicate with SEV-SNP firmware to generate a fresh memory +encryption key for the VM, encrypt the boot images for a successful launch. For +more details on the SEV-SNP firmware interfaces used by these commands please +see the SEV-SNP Firmware ABI. + +``SNP_LAUNCH_START`` is called first to create a cryptographic launch context +within the firmware. To create this context, the guest owner must provide a +guest policy and other parameters as described in the SEV-SNP firmware +specification. The launch parameters should be specified as described in the +QAPI schema for the sev-snp-guest object. + +The ``SNP_LAUNCH_START`` uses the following parameters, which can be configured +by the corresponding parameters documented in the QAPI schema for the +'sev-snp-guest' object. + ++--------+-------+----------+-------------------------------------------------+ +| key | type | default | meaning | ++---------------------------+-------------------------------------------------+ +| policy | hex | 0x30000 | a 64-bit guest policy | ++---------------------------+-------------------------------------------------+ +| guest-visible-workarounds | string| 0 | 16-byte base64 encoded string| +| | | | for guest OS visible | +| | | | workarounds. | ++---------------------------+-------------------------------------------------+ + +``SNP_LAUNCH_UPDATE`` encrypts the memory region using the cryptographic context +created via the ``SNP_LAUNCH_START`` command. If required, this command can be +called multiple times to encrypt different memory regions. The command also +calculates the measurement of the memory contents as it encrypts. + +``SNP_LAUNCH_FINISH`` finalizes the guest launch flow. Optionally, while +finalizing the launch the firmware can perform checks on the launch digest +computing through the ``SNP_LAUNCH_UPDATE``. To perform the check the user must +supply the id block, authentication blob and host data that should be included +in the attestation report. See the SEV-SNP spec for further details. + +The ``SNP_LAUNCH_FINISH`` uses the following parameters, which can be configured +by the corresponding parameters documented in the QAPI schema for the +'sev-snp-guest' object. + ++--------------------+-------+----------+-------------------------------------+ +| key | type | default | meaning | ++--------------------+-------+----------+-------------------------------------+ +| id-block | string| none | base64 encoded ID block | ++--------------------+-------+----------+-------------------------------------+ +| id-auth | string| none | base64 encoded authentication | +| | | | information | ++--------------------+-------+----------+-------------------------------------+ +| author-key-enabled | bool | 0 | auth block contains author key | ++--------------------+-------+----------+-------------------------------------+ +| host_data | string| none | host provided data | ++--------------------+-------+----------+-------------------------------------+ + +To launch a SEV-SNP guest (additional parameters are documented in the QAPI +schema for the 'sev-snp-guest' object):: + + # ${QEMU} \ + -machine ...,confidential-guest-support=sev0 \ + -object sev-snp-guest,id=sev0,cbitpos=51,reduced-phys-bits=1 + + Debugging --------- diff --git a/qapi/qom.json b/qapi/qom.json index 056b38f491..8bd299265e 100644 --- a/qapi/qom.json +++ b/qapi/qom.json @@ -929,6 +929,62 @@ '*handle': 'uint32', '*legacy-vm-type': 'bool' } } +## +# @SevSnpGuestProperties: +# +# Properties for sev-snp-guest objects. Most of these are direct +# arguments for the KVM_SNP_* interfaces documented in the Linux +# kernel source under +# Documentation/arch/x86/amd-memory-encryption.rst, which are in turn +# closely coupled with the SNP_INIT/SNP_LAUNCH_* firmware commands +# documented in the SEV-SNP Firmware ABI Specification (Rev 0.9). +# +# More usage information is also available in the QEMU source tree +# under docs/amd-memory-encryption. +# +# @policy: the 'POLICY' parameter to the SNP_LAUNCH_START command, as +# defined in the SEV-SNP firmware ABI (default: 0x30000) +# +# @guest-visible-workarounds: 16-byte, base64-encoded blob to report +# hypervisor-defined workarounds, corresponding to the 'GOSVW' +# parameter of the SNP_LAUNCH_START command defined in the SEV-SNP +# firmware ABI (default: all-zero) +# +# @id-block: 96-byte, base64-encoded blob to provide the 'ID Block' +# structure for the SNP_LAUNCH_FINISH command defined in the +# SEV-SNP firmware ABI (default: all-zero) +# +# @id-auth: 4096-byte, base64-encoded blob to provide the 'ID +# Authentication Information Structure' for the SNP_LAUNCH_FINISH +# command defined in the SEV-SNP firmware ABI (default: all-zero) +# +# @author-key-enabled: true if 'id-auth' blob contains the 'AUTHOR_KEY' +# field defined SEV-SNP firmware ABI (default: false) +# +# @host-data: 32-byte, base64-encoded, user-defined blob to provide to +# the guest, as documented for the 'HOST_DATA' parameter of the +# SNP_LAUNCH_FINISH command in the SEV-SNP firmware ABI (default: +# all-zero) +# +# @vcek-disabled: Guests are by default allowed to choose between VLEK +# (Versioned Loaded Endorsement Key) or VCEK (Versioned Chip +# Endorsement Key) when requesting attestation reports from +# firmware. Set this to true to disable the use of VCEK. +# (default: false) (since: 9.1) +# +# Since: 9.1 +## +{ 'struct': 'SevSnpGuestProperties', + 'base': 'SevCommonProperties', + 'data': { + '*policy': 'uint64', + '*guest-visible-workarounds': 'str', + '*id-block': 'str', + '*id-auth': 'str', + '*author-key-enabled': 'bool', + '*host-data': 'str', + '*vcek-disabled': 'bool' } } + ## # @ThreadContextProperties: # @@ -1007,6 +1063,7 @@ { 'name': 'secret_keyring', 'if': 'CONFIG_SECRET_KEYRING' }, 'sev-guest', + 'sev-snp-guest', 'thread-context', 's390-pv-guest', 'throttle-group', @@ -1077,6 +1134,7 @@ 'secret_keyring': { 'type': 'SecretKeyringProperties', 'if': 'CONFIG_SECRET_KEYRING' }, 'sev-guest': 'SevGuestProperties', + 'sev-snp-guest': 'SevSnpGuestProperties', 'thread-context': 'ThreadContextProperties', 'throttle-group': 'ThrottleGroupProperties', 'tls-creds-anon': 'TlsCredsAnonProperties', diff --git a/target/i386/sev.c b/target/i386/sev.c index 28a018ed83..a81b3228d4 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -42,6 +42,7 @@ OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) +OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) struct SevCommonState { X86ConfidentialGuest parent_obj; @@ -96,8 +97,22 @@ struct SevGuestState { bool legacy_vm_type; }; +struct SevSnpGuestState { + SevCommonState parent_obj; + + /* configuration parameters */ + char *guest_visible_workarounds; + char *id_block; + char *id_auth; + char *host_data; + + struct kvm_sev_snp_launch_start kvm_start_conf; + struct kvm_sev_snp_launch_finish kvm_finish_conf; +}; + #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ #define DEFAULT_SEV_DEVICE "/dev/sev" +#define DEFAULT_SEV_SNP_POLICY 0x30000 #define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" typedef struct __attribute__((__packed__)) SevInfoBlock { @@ -1500,11 +1515,249 @@ static const TypeInfo sev_guest_info = { .class_init = sev_guest_class_init, }; +static void +sev_snp_guest_get_policy(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + visit_type_uint64(v, name, + (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, + errp); +} + +static void +sev_snp_guest_set_policy(Object *obj, Visitor *v, const char *name, + void *opaque, Error **errp) +{ + visit_type_uint64(v, name, + (uint64_t *)&SEV_SNP_GUEST(obj)->kvm_start_conf.policy, + errp); +} + +static char * +sev_snp_guest_get_guest_visible_workarounds(Object *obj, Error **errp) +{ + return g_strdup(SEV_SNP_GUEST(obj)->guest_visible_workarounds); +} + +static void +sev_snp_guest_set_guest_visible_workarounds(Object *obj, const char *value, + Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; + g_autofree guchar *blob; + gsize len; + + g_free(sev_snp_guest->guest_visible_workarounds); + + /* store the base64 str so we don't need to re-encode in getter */ + sev_snp_guest->guest_visible_workarounds = g_strdup(value); + + blob = qbase64_decode(sev_snp_guest->guest_visible_workarounds, + -1, &len, errp); + if (!blob) { + return; + } + + if (len != sizeof(start->gosvw)) { + error_setg(errp, "parameter length of %lu exceeds max of %lu", + len, sizeof(start->gosvw)); + return; + } + + memcpy(start->gosvw, blob, len); +} + +static char * +sev_snp_guest_get_id_block(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return g_strdup(sev_snp_guest->id_block); +} + +static void +sev_snp_guest_set_id_block(Object *obj, const char *value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + gsize len; + + g_free(sev_snp_guest->id_block); + g_free((guchar *)finish->id_block_uaddr); + + /* store the base64 str so we don't need to re-encode in getter */ + sev_snp_guest->id_block = g_strdup(value); + + finish->id_block_uaddr = + (uint64_t)qbase64_decode(sev_snp_guest->id_block, -1, &len, errp); + + if (!finish->id_block_uaddr) { + return; + } + + if (len != KVM_SEV_SNP_ID_BLOCK_SIZE) { + error_setg(errp, "parameter length of %lu not equal to %u", + len, KVM_SEV_SNP_ID_BLOCK_SIZE); + return; + } + + finish->id_block_en = (len) ? 1 : 0; +} + +static char * +sev_snp_guest_get_id_auth(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return g_strdup(sev_snp_guest->id_auth); +} + +static void +sev_snp_guest_set_id_auth(Object *obj, const char *value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + gsize len; + + g_free(sev_snp_guest->id_auth); + g_free((guchar *)finish->id_auth_uaddr); + + /* store the base64 str so we don't need to re-encode in getter */ + sev_snp_guest->id_auth = g_strdup(value); + + finish->id_auth_uaddr = + (uint64_t)qbase64_decode(sev_snp_guest->id_auth, -1, &len, errp); + + if (!finish->id_auth_uaddr) { + return; + } + + if (len > KVM_SEV_SNP_ID_AUTH_SIZE) { + error_setg(errp, "parameter length:ID_AUTH %lu exceeds max of %u", + len, KVM_SEV_SNP_ID_AUTH_SIZE); + return; + } +} + +static bool +sev_snp_guest_get_author_key_enabled(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return !!sev_snp_guest->kvm_finish_conf.auth_key_en; +} + +static void +sev_snp_guest_set_author_key_enabled(Object *obj, bool value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + sev_snp_guest->kvm_finish_conf.auth_key_en = value; +} + +static bool +sev_snp_guest_get_vcek_disabled(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return !!sev_snp_guest->kvm_finish_conf.vcek_disabled; +} + +static void +sev_snp_guest_set_vcek_disabled(Object *obj, bool value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + sev_snp_guest->kvm_finish_conf.vcek_disabled = value; +} + +static char * +sev_snp_guest_get_host_data(Object *obj, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + return g_strdup(sev_snp_guest->host_data); +} + +static void +sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + struct kvm_sev_snp_launch_finish *finish = &sev_snp_guest->kvm_finish_conf; + g_autofree guchar *blob; + gsize len; + + g_free(sev_snp_guest->host_data); + + /* store the base64 str so we don't need to re-encode in getter */ + sev_snp_guest->host_data = g_strdup(value); + + blob = qbase64_decode(sev_snp_guest->host_data, -1, &len, errp); + + if (!blob) { + return; + } + + if (len != sizeof(finish->host_data)) { + error_setg(errp, "parameter length of %lu not equal to %lu", + len, sizeof(finish->host_data)); + return; + } + + memcpy(finish->host_data, blob, len); +} + +static void +sev_snp_guest_class_init(ObjectClass *oc, void *data) +{ + object_class_property_add(oc, "policy", "uint64", + sev_snp_guest_get_policy, + sev_snp_guest_set_policy, NULL, NULL); + object_class_property_add_str(oc, "guest-visible-workarounds", + sev_snp_guest_get_guest_visible_workarounds, + sev_snp_guest_set_guest_visible_workarounds); + object_class_property_add_str(oc, "id-block", + sev_snp_guest_get_id_block, + sev_snp_guest_set_id_block); + object_class_property_add_str(oc, "id-auth", + sev_snp_guest_get_id_auth, + sev_snp_guest_set_id_auth); + object_class_property_add_bool(oc, "author-key-enabled", + sev_snp_guest_get_author_key_enabled, + sev_snp_guest_set_author_key_enabled); + object_class_property_add_bool(oc, "vcek-required", + sev_snp_guest_get_vcek_disabled, + sev_snp_guest_set_vcek_disabled); + object_class_property_add_str(oc, "host-data", + sev_snp_guest_get_host_data, + sev_snp_guest_set_host_data); +} + +static void +sev_snp_guest_instance_init(Object *obj) +{ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + + /* default init/start/finish params for kvm */ + sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; +} + +/* guest info specific to sev-snp */ +static const TypeInfo sev_snp_guest_info = { + .parent = TYPE_SEV_COMMON, + .name = TYPE_SEV_SNP_GUEST, + .instance_size = sizeof(SevSnpGuestState), + .class_init = sev_snp_guest_class_init, + .instance_init = sev_snp_guest_instance_init, +}; + static void sev_register_types(void) { type_register_static(&sev_common_info); type_register_static(&sev_guest_info); + type_register_static(&sev_snp_guest_info); } type_init(sev_register_types); diff --git a/target/i386/sev.h b/target/i386/sev.h index 668374eef3..bedc667eeb 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -22,6 +22,7 @@ #define TYPE_SEV_COMMON "sev-common" #define TYPE_SEV_GUEST "sev-guest" +#define TYPE_SEV_SNP_GUEST "sev-snp-guest" #define SEV_POLICY_NODBG 0x1 #define SEV_POLICY_NOKS 0x2 From 99190f805dca9475fe244fbd8041961842657dc2 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:20 -0500 Subject: [PATCH 23/46] i386/sev: Add a sev_snp_enabled() helper Add a simple helper to check if the current guest type is SNP. Also have SNP-enabled imply that SEV-ES is enabled as well, and fix up any places where the sev_es_enabled() check is expecting a pure/non-SNP guest. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-9-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 13 ++++++++++++- target/i386/sev.h | 2 ++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index a81b3228d4..4edfedc139 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -325,12 +325,21 @@ sev_enabled(void) return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_COMMON); } +bool +sev_snp_enabled(void) +{ + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + + return !!object_dynamic_cast(OBJECT(cgs), TYPE_SEV_SNP_GUEST); +} + bool sev_es_enabled(void) { ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; - return sev_enabled() && (SEV_GUEST(cgs)->policy & SEV_POLICY_ES); + return sev_snp_enabled() || + (sev_enabled() && SEV_GUEST(cgs)->policy & SEV_POLICY_ES); } uint32_t @@ -946,7 +955,9 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) "support", __func__); goto err; } + } + if (sev_es_enabled() && !sev_snp_enabled()) { if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { error_setg(errp, "%s: guest policy requires SEV-ES, but " "host SEV-ES support unavailable", diff --git a/target/i386/sev.h b/target/i386/sev.h index bedc667eeb..94295ee74f 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -45,9 +45,11 @@ typedef struct SevKernelLoaderContext { #ifdef CONFIG_SEV bool sev_enabled(void); bool sev_es_enabled(void); +bool sev_snp_enabled(void); #else #define sev_enabled() 0 #define sev_es_enabled() 0 +#define sev_snp_enabled() 0 #endif uint32_t sev_get_cbit_position(void); From 990da8d243a8c59dafcbed78b56a0e4ffb1605d9 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:21 -0500 Subject: [PATCH 24/46] i386/sev: Add sev_kvm_init() override for SEV class Some aspects of the init routine SEV are specific to SEV and not applicable for SNP guests, so move the SEV-specific bits into separate class method and retain only the common functionality. Co-developed-by: Michael Roth Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-10-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 72 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 4edfedc139..5519de1c6b 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -73,6 +73,7 @@ struct SevCommonStateClass { /* public */ int (*launch_start)(SevCommonState *sev_common); void (*launch_finish)(SevCommonState *sev_common); + int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); }; /** @@ -882,7 +883,7 @@ out: return sev_common->kvm_type; } -static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { SevCommonState *sev_common = SEV_COMMON(cgs); char *devname; @@ -892,12 +893,6 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) struct sev_user_data_status status = {}; SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); - ret = ram_block_discard_disable(true); - if (ret) { - error_report("%s: cannot disable RAM discard", __func__); - return -1; - } - sev_common->state = SEV_STATE_UNINIT; host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); @@ -911,7 +906,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) if (host_cbitpos != sev_common->cbitpos) { error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", __func__, host_cbitpos, sev_common->cbitpos); - goto err; + return -1; } /* @@ -924,7 +919,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) error_setg(errp, "%s: reduced_phys_bits check failed," " it should be in the range of 1 to 63, requested '%d'", __func__, sev_common->reduced_phys_bits); - goto err; + return -1; } devname = object_property_get_str(OBJECT(sev_common), "sev-device", NULL); @@ -933,7 +928,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) error_setg(errp, "%s: Failed to open %s '%s'", __func__, devname, strerror(errno)); g_free(devname); - goto err; + return -1; } g_free(devname); @@ -943,7 +938,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) error_setg(errp, "%s: failed to get platform status ret=%d " "fw_error='%d: %s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); - goto err; + return -1; } sev_common->build_id = status.build; sev_common->api_major = status.api_major; @@ -953,7 +948,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) if (!kvm_kernel_irqchip_allowed()) { error_setg(errp, "%s: SEV-ES guests require in-kernel irqchip" "support", __func__); - goto err; + return -1; } } @@ -962,7 +957,7 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) error_setg(errp, "%s: guest policy requires SEV-ES, but " "host SEV-ES support unavailable", __func__); - goto err; + return -1; } } @@ -980,25 +975,59 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) if (ret) { error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); - goto err; + return -1; } ret = klass->launch_start(sev_common); if (ret) { error_setg(errp, "%s: failed to create encryption context", __func__); - goto err; + return -1; + } + + if (klass->kvm_init && klass->kvm_init(cgs, errp)) { + return -1; } - ram_block_notifier_add(&sev_ram_notifier); - qemu_add_machine_init_done_notifier(&sev_machine_done_notify); qemu_add_vm_change_state_handler(sev_vm_state_change, sev_common); cgs->ready = true; return 0; -err: - ram_block_discard_disable(false); - return -1; +} + +static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + int ret; + + /* + * SEV/SEV-ES rely on pinned memory to back guest RAM so discarding + * isn't actually possible. With SNP, only guest_memfd pages are used + * for private guest memory, so discarding of shared memory is still + * possible.. + */ + ret = ram_block_discard_disable(true); + if (ret) { + error_setg(errp, "%s: cannot disable RAM discard", __func__); + return -1; + } + + /* + * SEV uses these notifiers to register/pin pages prior to guest use, + * but SNP relies on guest_memfd for private pages, which has its + * own internal mechanisms for registering/pinning private memory. + */ + ram_block_notifier_add(&sev_ram_notifier); + + /* + * The machine done notify event is used for SEV guests to get the + * measurement of the encrypted images. When SEV-SNP is enabled, the + * measurement is part of the guest attestation process where it can + * be collected without any reliance on the VMM. So skip registering + * the notifier for SNP in favor of using guest attestation instead. + */ + qemu_add_machine_init_done_notifier(&sev_machine_done_notify); + + return 0; } int @@ -1397,7 +1426,7 @@ sev_common_class_init(ObjectClass *oc, void *data) ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); - klass->kvm_init = sev_kvm_init; + klass->kvm_init = sev_common_kvm_init; x86_klass->kvm_type = sev_kvm_type; object_class_property_add_str(oc, "sev-device", @@ -1486,6 +1515,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) klass->launch_start = sev_launch_start; klass->launch_finish = sev_launch_finish; + klass->kvm_init = sev_kvm_init; object_class_property_add_str(oc, "dh-cert-file", sev_guest_get_dh_cert_file, From 125b95a6d465a03ff30816eff0b1889aec01f0c3 Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:22 -0500 Subject: [PATCH 25/46] i386/sev: Add snp_kvm_init() override for SNP class SNP does not support SMM and requires guest_memfd for private guest memory, so add SNP specific kvm_init() functionality in snp_kvm_init() class method. Signed-off-by: Michael Roth Co-developed-by: Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-11-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 5519de1c6b..6525b3c1a0 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -885,12 +885,12 @@ out: static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { - SevCommonState *sev_common = SEV_COMMON(cgs); char *devname; int ret, fw_error, cmd; uint32_t ebx; uint32_t host_cbitpos; struct sev_user_data_status status = {}; + SevCommonState *sev_common = SEV_COMMON(cgs); SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); sev_common->state = SEV_STATE_UNINIT; @@ -1030,6 +1030,21 @@ static int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) return 0; } +static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + MachineState *ms = MACHINE(qdev_get_machine()); + X86MachineState *x86ms = X86_MACHINE(ms); + + if (x86ms->smm == ON_OFF_AUTO_AUTO) { + x86ms->smm = ON_OFF_AUTO_OFF; + } else if (x86ms->smm == ON_OFF_AUTO_ON) { + error_setg(errp, "SEV-SNP does not support SMM."); + return -1; + } + + return 0; +} + int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) { @@ -1752,6 +1767,10 @@ sev_snp_guest_set_host_data(Object *obj, const char *value, Error **errp) static void sev_snp_guest_class_init(ObjectClass *oc, void *data) { + SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + + klass->kvm_init = sev_snp_kvm_init; + object_class_property_add(oc, "policy", "uint64", sev_snp_guest_get_policy, sev_snp_guest_set_policy, NULL, NULL); @@ -1778,8 +1797,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) static void sev_snp_guest_instance_init(Object *obj) { + ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj); SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj); + cgs->require_guest_memfd = true; + /* default init/start/finish params for kvm */ sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY; } From 7831221941cccbde922412c1550ed8b4bce7c361 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:23 -0500 Subject: [PATCH 26/46] i386/cpu: Set SEV-SNP CPUID bit when SNP enabled SNP guests will rely on this bit to determine certain feature support. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-12-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/i386/cpu.c b/target/i386/cpu.c index bc2dceb647..914bef442c 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -6979,6 +6979,7 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, if (sev_enabled()) { *eax = 0x2; *eax |= sev_es_enabled() ? 0x8 : 0; + *eax |= sev_snp_enabled() ? 0x10 : 0; *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ } From 73ae63b162fc1fed520f53ad200712964d7d0264 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:24 -0500 Subject: [PATCH 27/46] i386/sev: Don't return launch measurements for SEV-SNP guests For SEV-SNP guests, launch measurement is queried from within the guest during attestation, so don't attempt to return it as part of query-sev-launch-measure. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-13-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 6525b3c1a0..c3daaf1ad5 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -795,7 +795,9 @@ sev_launch_get_measure(Notifier *notifier, void *unused) static char *sev_get_launch_measurement(void) { - SevGuestState *sev_guest = SEV_GUEST(MACHINE(qdev_get_machine())->cgs); + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs; + SevGuestState *sev_guest = + (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); if (sev_guest && SEV_COMMON(sev_guest)->state >= SEV_STATE_LAUNCH_SECRET) { From a808132f6d8e855bd83a400570ec91d2e00bebe3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 12:44:44 +0200 Subject: [PATCH 28/46] i386/sev: Add a class method to determine KVM VM type for SNP guests SEV guests can use either KVM_X86_DEFAULT_VM, KVM_X86_SEV_VM, or KVM_X86_SEV_ES_VM depending on the configuration and what the host kernel supports. SNP guests on the other hand can only ever use KVM_X86_SNP_VM, so split determination of VM type out into a separate class method that can be set accordingly for sev-guest vs. sev-snp-guest objects and add handling for SNP. Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-14-pankaj.gupta@amd.com> [Remove unnecessary function pointer declaration. - Paolo] Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 1 + target/i386/sev.c | 15 ++++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 6c864e4611..23a003aaa7 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -167,6 +167,7 @@ static const char *vm_type_name[] = { [KVM_X86_DEFAULT_VM] = "default", [KVM_X86_SEV_VM] = "SEV", [KVM_X86_SEV_ES_VM] = "SEV-ES", + [KVM_X86_SNP_VM] = "SEV-SNP", }; bool kvm_is_vm_type_supported(int type) diff --git a/target/i386/sev.c b/target/i386/sev.c index c3daaf1ad5..072cc4f853 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -885,6 +885,11 @@ out: return sev_common->kvm_type; } +static int sev_snp_kvm_type(X86ConfidentialGuest *cg) +{ + return KVM_X86_SNP_VM; +} + static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { char *devname; @@ -894,6 +899,8 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) struct sev_user_data_status status = {}; SevCommonState *sev_common = SEV_COMMON(cgs); SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(cgs); + X86ConfidentialGuestClass *x86_klass = + X86_CONFIDENTIAL_GUEST_GET_CLASS(cgs); sev_common->state = SEV_STATE_UNINIT; @@ -964,7 +971,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } trace_kvm_sev_init(); - if (sev_kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { + if (x86_klass->kvm_type(X86_CONFIDENTIAL_GUEST(sev_common)) == KVM_X86_DEFAULT_VM) { cmd = sev_es_enabled() ? KVM_SEV_ES_INIT : KVM_SEV_INIT; ret = sev_ioctl(sev_common->sev_fd, cmd, NULL, &fw_error); @@ -1441,10 +1448,8 @@ static void sev_common_class_init(ObjectClass *oc, void *data) { ConfidentialGuestSupportClass *klass = CONFIDENTIAL_GUEST_SUPPORT_CLASS(oc); - X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); klass->kvm_init = sev_common_kvm_init; - x86_klass->kvm_type = sev_kvm_type; object_class_property_add_str(oc, "sev-device", sev_common_get_sev_device, @@ -1529,10 +1534,12 @@ static void sev_guest_class_init(ObjectClass *oc, void *data) { SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); klass->launch_start = sev_launch_start; klass->launch_finish = sev_launch_finish; klass->kvm_init = sev_kvm_init; + x86_klass->kvm_type = sev_kvm_type; object_class_property_add_str(oc, "dh-cert-file", sev_guest_get_dh_cert_file, @@ -1770,8 +1777,10 @@ static void sev_snp_guest_class_init(ObjectClass *oc, void *data) { SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); + X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); klass->kvm_init = sev_snp_kvm_init; + x86_klass->kvm_type = sev_snp_kvm_type; object_class_property_add(oc, "policy", "uint64", sev_snp_guest_get_policy, From 59d3740cb4ac0f010ce35877572904f6297284b4 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:26 -0500 Subject: [PATCH 29/46] i386/sev: Update query-sev QAPI format to handle SEV-SNP Most of the current 'query-sev' command is relevant to both legacy SEV/SEV-ES guests and SEV-SNP guests, with 2 exceptions: - 'policy' is a 64-bit field for SEV-SNP, not 32-bit, and the meaning of the bit positions has changed - 'handle' is not relevant to SEV-SNP To address this, this patch adds a new 'sev-type' field that can be used as a discriminator to select between SEV and SEV-SNP-specific fields/formats without breaking compatibility for existing management tools (so long as management tools that add support for launching SEV-SNP guest update their handling of query-sev appropriately). The corresponding HMP command has also been fixed up similarly. Signed-off-by: Michael Roth Co-developed-by:Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-15-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- qapi/misc-target.json | 72 ++++++++++++++++++++++++++++++++++--------- target/i386/sev.c | 57 +++++++++++++++++++++------------- target/i386/sev.h | 3 ++ 3 files changed, 97 insertions(+), 35 deletions(-) diff --git a/qapi/misc-target.json b/qapi/misc-target.json index 4e0a6492a9..2d7d4d89bd 100644 --- a/qapi/misc-target.json +++ b/qapi/misc-target.json @@ -47,6 +47,50 @@ 'send-update', 'receive-update' ], 'if': 'TARGET_I386' } +## +# @SevGuestType: +# +# An enumeration indicating the type of SEV guest being run. +# +# @sev: The guest is a legacy SEV or SEV-ES guest. +# +# @sev-snp: The guest is an SEV-SNP guest. +# +# Since: 6.2 +## +{ 'enum': 'SevGuestType', + 'data': [ 'sev', 'sev-snp' ], + 'if': 'TARGET_I386' } + +## +# @SevGuestInfo: +# +# Information specific to legacy SEV/SEV-ES guests. +# +# @policy: SEV policy value +# +# @handle: SEV firmware handle +# +# Since: 2.12 +## +{ 'struct': 'SevGuestInfo', + 'data': { 'policy': 'uint32', + 'handle': 'uint32' }, + 'if': 'TARGET_I386' } + +## +# @SevSnpGuestInfo: +# +# Information specific to SEV-SNP guests. +# +# @snp-policy: SEV-SNP policy value +# +# Since: 9.1 +## +{ 'struct': 'SevSnpGuestInfo', + 'data': { 'snp-policy': 'uint64' }, + 'if': 'TARGET_I386' } + ## # @SevInfo: # @@ -60,25 +104,25 @@ # # @build-id: SEV FW build id # -# @policy: SEV policy value -# # @state: SEV guest state # -# @handle: SEV firmware handle +# @sev-type: Type of SEV guest being run # # Since: 2.12 ## -{ 'struct': 'SevInfo', - 'data': { 'enabled': 'bool', - 'api-major': 'uint8', - 'api-minor' : 'uint8', - 'build-id' : 'uint8', - 'policy' : 'uint32', - 'state' : 'SevState', - 'handle' : 'uint32' - }, - 'if': 'TARGET_I386' -} +{ 'union': 'SevInfo', + 'base': { 'enabled': 'bool', + 'api-major': 'uint8', + 'api-minor' : 'uint8', + 'build-id' : 'uint8', + 'state' : 'SevState', + 'sev-type' : 'SevGuestType' }, + 'discriminator': 'sev-type', + 'data': { + 'sev': 'SevGuestInfo', + 'sev-snp': 'SevSnpGuestInfo' }, + 'if': 'TARGET_I386' } + ## # @query-sev: diff --git a/target/i386/sev.c b/target/i386/sev.c index 072cc4f853..43d1c48bd9 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -363,25 +363,27 @@ static SevInfo *sev_get_info(void) { SevInfo *info; SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - SevGuestState *sev_guest = - (SevGuestState *)object_dynamic_cast(OBJECT(sev_common), - TYPE_SEV_GUEST); info = g_new0(SevInfo, 1); info->enabled = sev_enabled(); if (info->enabled) { - if (sev_guest) { - info->handle = sev_guest->handle; - } info->api_major = sev_common->api_major; info->api_minor = sev_common->api_minor; info->build_id = sev_common->build_id; info->state = sev_common->state; - /* we only report the lower 32-bits of policy for SNP, ok for now... */ - info->policy = - (uint32_t)object_property_get_uint(OBJECT(sev_common), - "policy", NULL); + + if (sev_snp_enabled()) { + info->sev_type = SEV_GUEST_TYPE_SEV_SNP; + info->u.sev_snp.snp_policy = + object_property_get_uint(OBJECT(sev_common), "policy", NULL); + } else { + info->sev_type = SEV_GUEST_TYPE_SEV; + info->u.sev.handle = SEV_GUEST(sev_common)->handle; + info->u.sev.policy = + (uint32_t)object_property_get_uint(OBJECT(sev_common), + "policy", NULL); + } } return info; @@ -404,20 +406,33 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) { SevInfo *info = sev_get_info(); - if (info && info->enabled) { - monitor_printf(mon, "handle: %d\n", info->handle); - monitor_printf(mon, "state: %s\n", SevState_str(info->state)); - monitor_printf(mon, "build: %d\n", info->build_id); - monitor_printf(mon, "api version: %d.%d\n", - info->api_major, info->api_minor); - monitor_printf(mon, "debug: %s\n", - info->policy & SEV_POLICY_NODBG ? "off" : "on"); - monitor_printf(mon, "key-sharing: %s\n", - info->policy & SEV_POLICY_NOKS ? "off" : "on"); - } else { + if (!info || !info->enabled) { monitor_printf(mon, "SEV is not enabled\n"); + goto out; } + monitor_printf(mon, "SEV type: %s\n", SevGuestType_str(info->sev_type)); + monitor_printf(mon, "state: %s\n", SevState_str(info->state)); + monitor_printf(mon, "build: %d\n", info->build_id); + monitor_printf(mon, "api version: %d.%d\n", info->api_major, + info->api_minor); + + if (sev_snp_enabled()) { + monitor_printf(mon, "debug: %s\n", + info->u.sev_snp.snp_policy & SEV_SNP_POLICY_DBG ? "on" + : "off"); + monitor_printf(mon, "SMT allowed: %s\n", + info->u.sev_snp.snp_policy & SEV_SNP_POLICY_SMT ? "on" + : "off"); + } else { + monitor_printf(mon, "handle: %d\n", info->u.sev.handle); + monitor_printf(mon, "debug: %s\n", + info->u.sev.policy & SEV_POLICY_NODBG ? "off" : "on"); + monitor_printf(mon, "key-sharing: %s\n", + info->u.sev.policy & SEV_POLICY_NOKS ? "off" : "on"); + } + +out: qapi_free_SevInfo(info); } diff --git a/target/i386/sev.h b/target/i386/sev.h index 94295ee74f..5dc4767b1e 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -31,6 +31,9 @@ #define SEV_POLICY_DOMAIN 0x10 #define SEV_POLICY_SEV 0x20 +#define SEV_SNP_POLICY_SMT 0x10000 +#define SEV_SNP_POLICY_DBG 0x80000 + typedef struct SevKernelLoaderContext { char *setup_data; size_t setup_size; From d3107f882ec22cfb211eab7efa0c4e95f5ce11bb Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 30 May 2024 06:16:27 -0500 Subject: [PATCH 30/46] i386/sev: Add the SNP launch start context The SNP_LAUNCH_START is called first to create a cryptographic launch context within the firmware. Signed-off-by: Brijesh Singh Signed-off-by: Michael Roth Co-developed-by: Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-16-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 39 +++++++++++++++++++++++++++++++++++++++ target/i386/trace-events | 1 + 2 files changed, 40 insertions(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index 43d1c48bd9..e89b87d2f5 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -39,6 +39,7 @@ #include "confidential-guest.h" #include "hw/i386/pc.h" #include "exec/address-spaces.h" +#include "qemu/queue.h" OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) @@ -115,6 +116,16 @@ struct SevSnpGuestState { #define DEFAULT_SEV_DEVICE "/dev/sev" #define DEFAULT_SEV_SNP_POLICY 0x30000 +typedef struct SevLaunchUpdateData { + QTAILQ_ENTRY(SevLaunchUpdateData) next; + hwaddr gpa; + void *hva; + uint64_t len; + int type; +} SevLaunchUpdateData; + +static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; + #define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" typedef struct __attribute__((__packed__)) SevInfoBlock { /* SEV-ES Reset Vector Address */ @@ -674,6 +685,31 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) return 0; } +static int +sev_snp_launch_start(SevCommonState *sev_common) +{ + int fw_error, rc; + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); + struct kvm_sev_snp_launch_start *start = &sev_snp_guest->kvm_start_conf; + + trace_kvm_sev_snp_launch_start(start->policy, + sev_snp_guest->guest_visible_workarounds); + + rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, + start, &fw_error); + if (rc < 0) { + error_report("%s: SNP_LAUNCH_START ret=%d fw_error=%d '%s'", + __func__, rc, fw_error, fw_error_to_str(fw_error)); + return 1; + } + + QTAILQ_INIT(&launch_update); + + sev_set_guest_state(sev_common, SEV_STATE_LAUNCH_UPDATE); + + return 0; +} + static int sev_launch_start(SevCommonState *sev_common) { @@ -1003,6 +1039,7 @@ static int sev_common_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } ret = klass->launch_start(sev_common); + if (ret) { error_setg(errp, "%s: failed to create encryption context", __func__); return -1; @@ -1794,9 +1831,11 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + klass->launch_start = sev_snp_launch_start; klass->kvm_init = sev_snp_kvm_init; x86_klass->kvm_type = sev_snp_kvm_type; + object_class_property_add(oc, "policy", "uint64", sev_snp_guest_get_policy, sev_snp_guest_set_policy, NULL, NULL); diff --git a/target/i386/trace-events b/target/i386/trace-events index 2cd8726eeb..cb26d8a925 100644 --- a/target/i386/trace-events +++ b/target/i386/trace-events @@ -11,3 +11,4 @@ kvm_sev_launch_measurement(const char *value) "data %s" kvm_sev_launch_finish(void) "" kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" +kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" From 9f3a6999f9730a694d7db448a99f9c9cb6515992 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 30 May 2024 06:16:28 -0500 Subject: [PATCH 31/46] i386/sev: Add handling to encrypt/finalize guest launch data Process any queued up launch data and encrypt/measure it into the SNP guest instance prior to initial guest launch. This also updates the KVM_SEV_SNP_LAUNCH_UPDATE call to handle partial update responses. Signed-off-by: Brijesh Singh Co-developed-by: Michael Roth Signed-off-by: Michael Roth Co-developed-by: Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-17-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 112 ++++++++++++++++++++++++++++++++++++++- target/i386/trace-events | 2 + 2 files changed, 113 insertions(+), 1 deletion(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index e89b87d2f5..ef2e592ca7 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -756,6 +756,76 @@ out: return ret; } +static const char * +snp_page_type_to_str(int type) +{ + switch (type) { + case KVM_SEV_SNP_PAGE_TYPE_NORMAL: return "Normal"; + case KVM_SEV_SNP_PAGE_TYPE_ZERO: return "Zero"; + case KVM_SEV_SNP_PAGE_TYPE_UNMEASURED: return "Unmeasured"; + case KVM_SEV_SNP_PAGE_TYPE_SECRETS: return "Secrets"; + case KVM_SEV_SNP_PAGE_TYPE_CPUID: return "Cpuid"; + default: return "unknown"; + } +} + +static int +sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, + SevLaunchUpdateData *data) +{ + int ret, fw_error; + struct kvm_sev_snp_launch_update update = {0}; + + if (!data->hva || !data->len) { + error_report("SNP_LAUNCH_UPDATE called with invalid address" + "/ length: %p / %lx", + data->hva, data->len); + return 1; + } + + update.uaddr = (__u64)(unsigned long)data->hva; + update.gfn_start = data->gpa >> TARGET_PAGE_BITS; + update.len = data->len; + update.type = data->type; + + /* + * KVM_SEV_SNP_LAUNCH_UPDATE requires that GPA ranges have the private + * memory attribute set in advance. + */ + ret = kvm_set_memory_attributes_private(data->gpa, data->len); + if (ret) { + error_report("SEV-SNP: failed to configure initial" + "private guest memory"); + goto out; + } + + while (update.len || ret == -EAGAIN) { + trace_kvm_sev_snp_launch_update(update.uaddr, update.gfn_start << + TARGET_PAGE_BITS, update.len, + snp_page_type_to_str(update.type)); + + ret = sev_ioctl(SEV_COMMON(sev_snp_guest)->sev_fd, + KVM_SEV_SNP_LAUNCH_UPDATE, + &update, &fw_error); + if (ret && ret != -EAGAIN) { + error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", + ret, fw_error, fw_error_to_str(fw_error)); + break; + } + } + +out: + if (!ret && update.gfn_start << TARGET_PAGE_BITS != data->gpa + data->len) { + error_report("SEV-SNP: expected update of GPA range %lx-%lx," + "got GPA range %lx-%llx", + data->gpa, data->gpa + data->len, data->gpa, + update.gfn_start << TARGET_PAGE_BITS); + ret = -EIO; + } + + return ret; +} + static int sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) { @@ -901,6 +971,46 @@ sev_launch_finish(SevCommonState *sev_common) migrate_add_blocker(&sev_mig_blocker, &error_fatal); } +static void +sev_snp_launch_finish(SevCommonState *sev_common) +{ + int ret, error; + Error *local_err = NULL; + SevLaunchUpdateData *data; + SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); + struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; + + QTAILQ_FOREACH(data, &launch_update, next) { + ret = sev_snp_launch_update(sev_snp, data); + if (ret) { + exit(1); + } + } + + trace_kvm_sev_snp_launch_finish(sev_snp->id_block, sev_snp->id_auth, + sev_snp->host_data); + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_FINISH, + finish, &error); + if (ret) { + error_report("SNP_LAUNCH_FINISH ret=%d fw_error=%d '%s'", + ret, error, fw_error_to_str(error)); + exit(1); + } + + sev_set_guest_state(sev_common, SEV_STATE_RUNNING); + + /* add migration blocker */ + error_setg(&sev_mig_blocker, + "SEV-SNP: Migration is not implemented"); + ret = migrate_add_blocker(&sev_mig_blocker, &local_err); + if (local_err) { + error_report_err(local_err); + error_free(sev_mig_blocker); + exit(1); + } +} + + static void sev_vm_state_change(void *opaque, bool running, RunState state) { @@ -1832,10 +1942,10 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); klass->launch_start = sev_snp_launch_start; + klass->launch_finish = sev_snp_launch_finish; klass->kvm_init = sev_snp_kvm_init; x86_klass->kvm_type = sev_snp_kvm_type; - object_class_property_add(oc, "policy", "uint64", sev_snp_guest_get_policy, sev_snp_guest_set_policy, NULL, NULL); diff --git a/target/i386/trace-events b/target/i386/trace-events index cb26d8a925..06b44ead2e 100644 --- a/target/i386/trace-events +++ b/target/i386/trace-events @@ -12,3 +12,5 @@ kvm_sev_launch_finish(void) "" kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" kvm_sev_snp_launch_start(uint64_t policy, char *gosvw) "policy 0x%" PRIx64 " gosvw %s" +kvm_sev_snp_launch_update(uint64_t src, uint64_t gpa, uint64_t len, const char *type) "src 0x%" PRIx64 " gpa 0x%" PRIx64 " len 0x%" PRIx64 " (%s page)" +kvm_sev_snp_launch_finish(char *id_block, char *id_auth, char *host_data) "id_block %s id_auth %s host_data %s" From 3d44fdff60ea66fbd7a33f5d32b50843cd80f48a Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:29 -0500 Subject: [PATCH 32/46] i386/sev: Set CPU state to protected once SNP guest payload is finalized Once KVM_SNP_LAUNCH_FINISH is called the vCPU state is copied into the vCPU's VMSA page and measured/encrypted. Any attempt to read/write CPU state afterward will only be acting on the initial data and so are effectively no-ops. Set the vCPU state to protected at this point so that QEMU don't continue trying to re-sync vCPU data during guest runtime. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-18-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index ef2e592ca7..e84e4395a5 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -997,6 +997,7 @@ sev_snp_launch_finish(SevCommonState *sev_common) exit(1); } + kvm_mark_guest_state_protected(); sev_set_guest_state(sev_common, SEV_STATE_RUNNING); /* add migration blocker */ From f3c30c575d34122573b7370a7da5ca3a27dde481 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 30 May 2024 06:16:30 -0500 Subject: [PATCH 33/46] hw/i386/sev: Add function to get SEV metadata from OVMF header A recent version of OVMF expanded the reset vector GUID list to add SEV-specific metadata GUID. The SEV metadata describes the reserved memory regions such as the secrets and CPUID page used during the SEV-SNP guest launch. The pc_system_get_ovmf_sev_metadata_ptr() is used to retieve the SEV metadata pointer from the OVMF GUID list. Signed-off-by: Brijesh Singh Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-19-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc_sysfw.c | 4 ++++ include/hw/i386/pc.h | 26 ++++++++++++++++++++++++++ target/i386/sev-sysemu-stub.c | 4 ++++ target/i386/sev.c | 32 ++++++++++++++++++++++++++++++++ target/i386/sev.h | 2 ++ 5 files changed, 68 insertions(+) diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index ac88ad4eb9..9b8671c441 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -260,6 +260,10 @@ void x86_firmware_configure(void *ptr, int size) pc_system_parse_ovmf_flash(ptr, size); if (sev_enabled()) { + + /* Copy the SEV metadata table (if it exists) */ + pc_system_parse_sev_metadata(ptr, size); + ret = sev_es_save_reset_vector(ptr, size); if (ret) { error_report("failed to locate and/or save reset vector"); diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index ad9c3d9ba8..c653b8eeb2 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -164,6 +164,32 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); #define PCI_HOST_ABOVE_4G_MEM_SIZE "above-4g-mem-size" #define PCI_HOST_PROP_SMM_RANGES "smm-ranges" +typedef enum { + SEV_DESC_TYPE_UNDEF, + /* The section contains the region that must be validated by the VMM. */ + SEV_DESC_TYPE_SNP_SEC_MEM, + /* The section contains the SNP secrets page */ + SEV_DESC_TYPE_SNP_SECRETS, + /* The section contains address that can be used as a CPUID page */ + SEV_DESC_TYPE_CPUID, + +} ovmf_sev_metadata_desc_type; + +typedef struct __attribute__((__packed__)) OvmfSevMetadataDesc { + uint32_t base; + uint32_t len; + ovmf_sev_metadata_desc_type type; +} OvmfSevMetadataDesc; + +typedef struct __attribute__((__packed__)) OvmfSevMetadata { + uint8_t signature[4]; + uint32_t len; + uint32_t version; + uint32_t num_desc; + OvmfSevMetadataDesc descs[]; +} OvmfSevMetadata; + +OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void); void pc_pci_as_mapping_init(MemoryRegion *system_memory, MemoryRegion *pci_address_space); diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c index 96e1c15cc3..fc1c57c411 100644 --- a/target/i386/sev-sysemu-stub.c +++ b/target/i386/sev-sysemu-stub.c @@ -67,3 +67,7 @@ void hmp_info_sev(Monitor *mon, const QDict *qdict) { monitor_printf(mon, "SEV is not available in this QEMU\n"); } + +void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) +{ +} diff --git a/target/i386/sev.c b/target/i386/sev.c index e84e4395a5..17281bb2c7 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -597,6 +597,38 @@ SevCapability *qmp_query_sev_capabilities(Error **errp) return sev_get_capabilities(errp); } +static OvmfSevMetadata *ovmf_sev_metadata_table; + +#define OVMF_SEV_META_DATA_GUID "dc886566-984a-4798-A75e-5585a7bf67cc" +typedef struct __attribute__((__packed__)) OvmfSevMetadataOffset { + uint32_t offset; +} OvmfSevMetadataOffset; + +OvmfSevMetadata *pc_system_get_ovmf_sev_metadata_ptr(void) +{ + return ovmf_sev_metadata_table; +} + +void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size) +{ + OvmfSevMetadata *metadata; + OvmfSevMetadataOffset *data; + + if (!pc_system_ovmf_table_find(OVMF_SEV_META_DATA_GUID, (uint8_t **)&data, + NULL)) { + return; + } + + metadata = (OvmfSevMetadata *)(flash_ptr + flash_size - data->offset); + if (memcmp(metadata->signature, "ASEV", 4) != 0 || + metadata->len < sizeof(OvmfSevMetadata) || + metadata->len > flash_size - data->offset) { + return; + } + + ovmf_sev_metadata_table = g_memdup2(metadata, metadata->len); +} + static SevAttestationReport *sev_get_attestation_report(const char *mnonce, Error **errp) { diff --git a/target/i386/sev.h b/target/i386/sev.h index 5dc4767b1e..cc12824dd6 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -66,4 +66,6 @@ int sev_inject_launch_secret(const char *hdr, const char *secret, int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); void sev_es_set_reset_vector(CPUState *cpu); +void pc_system_parse_sev_metadata(uint8_t *flash_ptr, size_t flash_size); + #endif From 3d8c2a7f4806ff39423312e503737fd76c34dcae Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 30 May 2024 06:16:31 -0500 Subject: [PATCH 34/46] i386/sev: Add support for populating OVMF metadata pages OVMF reserves various pages so they can be pre-initialized/validated prior to launching the guest. Add support for populating these pages with the expected content. Signed-off-by: Brijesh Singh Signed-off-by: Michael Roth Co-developed-by: Pankaj Gupta Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-20-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index 17281bb2c7..c57534fca2 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1003,15 +1003,89 @@ sev_launch_finish(SevCommonState *sev_common) migrate_add_blocker(&sev_mig_blocker, &error_fatal); } +static int +snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) +{ + SevLaunchUpdateData *data; + + data = g_new0(SevLaunchUpdateData, 1); + data->gpa = gpa; + data->hva = hva; + data->len = len; + data->type = type; + + QTAILQ_INSERT_TAIL(&launch_update, data, next); + + return 0; +} + +static int +snp_metadata_desc_to_page_type(int desc_type) +{ + switch (desc_type) { + /* Add the umeasured prevalidated pages as a zero page */ + case SEV_DESC_TYPE_SNP_SEC_MEM: return KVM_SEV_SNP_PAGE_TYPE_ZERO; + case SEV_DESC_TYPE_SNP_SECRETS: return KVM_SEV_SNP_PAGE_TYPE_SECRETS; + case SEV_DESC_TYPE_CPUID: return KVM_SEV_SNP_PAGE_TYPE_CPUID; + default: + return KVM_SEV_SNP_PAGE_TYPE_ZERO; + } +} + +static void +snp_populate_metadata_pages(SevSnpGuestState *sev_snp, + OvmfSevMetadata *metadata) +{ + OvmfSevMetadataDesc *desc; + int type, ret, i; + void *hva; + MemoryRegion *mr = NULL; + + for (i = 0; i < metadata->num_desc; i++) { + desc = &metadata->descs[i]; + + type = snp_metadata_desc_to_page_type(desc->type); + + hva = gpa2hva(&mr, desc->base, desc->len, NULL); + if (!hva) { + error_report("%s: Failed to get HVA for GPA 0x%x sz 0x%x", + __func__, desc->base, desc->len); + exit(1); + } + + ret = snp_launch_update_data(desc->base, hva, desc->len, type); + if (ret) { + error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", + __func__, desc->base, desc->len, desc->type); + exit(1); + } + } +} + static void sev_snp_launch_finish(SevCommonState *sev_common) { int ret, error; Error *local_err = NULL; + OvmfSevMetadata *metadata; SevLaunchUpdateData *data; SevSnpGuestState *sev_snp = SEV_SNP_GUEST(sev_common); struct kvm_sev_snp_launch_finish *finish = &sev_snp->kvm_finish_conf; + /* + * To boot the SNP guest, the hypervisor is required to populate the CPUID + * and Secrets page before finalizing the launch flow. The location of + * the secrets and CPUID page is available through the OVMF metadata GUID. + */ + metadata = pc_system_get_ovmf_sev_metadata_ptr(); + if (metadata == NULL) { + error_report("%s: Failed to locate SEV metadata header", __func__); + exit(1); + } + + /* Populate all the metadata pages */ + snp_populate_metadata_pages(sev_snp, metadata); + QTAILQ_FOREACH(data, &launch_update, next) { ret = sev_snp_launch_update(sev_snp, data); if (ret) { From 70943ad8e4dfbe5f77006b880290219be9d03553 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:32 -0500 Subject: [PATCH 35/46] i386/sev: Add support for SNP CPUID validation SEV-SNP firmware allows a special guest page to be populated with a table of guest CPUID values so that they can be validated through firmware before being loaded into encrypted guest memory where they can be used in place of hypervisor-provided values[1]. As part of SEV-SNP guest initialization, use this interface to validate the CPUID entries reported by KVM_GET_CPUID2 prior to initial guest start and populate the CPUID page reserved by OVMF with the resulting encrypted data. [1] SEV SNP Firmware ABI Specification, Rev. 0.8, 8.13.2.6 Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-21-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 164 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 162 insertions(+), 2 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index c57534fca2..06401f0526 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -200,6 +200,36 @@ static const char *const sev_fw_errlist[] = { #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) +/* doesn't expose this, so re-use the max from kvm.c */ +#define KVM_MAX_CPUID_ENTRIES 100 + +typedef struct KvmCpuidInfo { + struct kvm_cpuid2 cpuid; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; +} KvmCpuidInfo; + +#define SNP_CPUID_FUNCTION_MAXCOUNT 64 +#define SNP_CPUID_FUNCTION_UNKNOWN 0xFFFFFFFF + +typedef struct { + uint32_t eax_in; + uint32_t ecx_in; + uint64_t xcr0_in; + uint64_t xss_in; + uint32_t eax; + uint32_t ebx; + uint32_t ecx; + uint32_t edx; + uint64_t reserved; +} __attribute__((packed)) SnpCpuidFunc; + +typedef struct { + uint32_t count; + uint32_t reserved1; + uint64_t reserved2; + SnpCpuidFunc entries[SNP_CPUID_FUNCTION_MAXCOUNT]; +} __attribute__((packed)) SnpCpuidInfo; + static int sev_ioctl(int fd, int cmd, void *data, int *error) { @@ -788,6 +818,35 @@ out: return ret; } +static void +sev_snp_cpuid_report_mismatches(SnpCpuidInfo *old, + SnpCpuidInfo *new) +{ + size_t i; + + if (old->count != new->count) { + error_report("SEV-SNP: CPUID validation failed due to count mismatch," + "provided: %d, expected: %d", old->count, new->count); + return; + } + + for (i = 0; i < old->count; i++) { + SnpCpuidFunc *old_func, *new_func; + + old_func = &old->entries[i]; + new_func = &new->entries[i]; + + if (memcmp(old_func, new_func, sizeof(SnpCpuidFunc))) { + error_report("SEV-SNP: CPUID validation failed for function 0x%x, index: 0x%x" + "provided: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x" + "expected: eax:0x%08x, ebx: 0x%08x, ecx: 0x%08x, edx: 0x%08x", + old_func->eax_in, old_func->ecx_in, + old_func->eax, old_func->ebx, old_func->ecx, old_func->edx, + new_func->eax, new_func->ebx, new_func->ecx, new_func->edx); + } + } +} + static const char * snp_page_type_to_str(int type) { @@ -806,6 +865,7 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, SevLaunchUpdateData *data) { int ret, fw_error; + SnpCpuidInfo snp_cpuid_info; struct kvm_sev_snp_launch_update update = {0}; if (!data->hva || !data->len) { @@ -815,6 +875,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, return 1; } + if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + /* Save a copy for comparison in case the LAUNCH_UPDATE fails */ + memcpy(&snp_cpuid_info, data->hva, sizeof(snp_cpuid_info)); + } + update.uaddr = (__u64)(unsigned long)data->hva; update.gfn_start = data->gpa >> TARGET_PAGE_BITS; update.len = data->len; @@ -842,6 +907,11 @@ sev_snp_launch_update(SevSnpGuestState *sev_snp_guest, if (ret && ret != -EAGAIN) { error_report("SNP_LAUNCH_UPDATE ret=%d fw_error=%d '%s'", ret, fw_error, fw_error_to_str(fw_error)); + + if (data->type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + sev_snp_cpuid_report_mismatches(&snp_cpuid_info, data->hva); + error_report("SEV-SNP: failed update CPUID page"); + } break; } } @@ -1004,7 +1074,8 @@ sev_launch_finish(SevCommonState *sev_common) } static int -snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) +snp_launch_update_data(uint64_t gpa, void *hva, + uint32_t len, int type) { SevLaunchUpdateData *data; @@ -1019,6 +1090,90 @@ snp_launch_update_data(uint64_t gpa, void *hva, uint32_t len, int type) return 0; } +static int +sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, + const KvmCpuidInfo *kvm_cpuid_info) +{ + size_t i; + + if (kvm_cpuid_info->cpuid.nent > SNP_CPUID_FUNCTION_MAXCOUNT) { + error_report("SEV-SNP: CPUID entry count (%d) exceeds max (%d)", + kvm_cpuid_info->cpuid.nent, SNP_CPUID_FUNCTION_MAXCOUNT); + return -1; + } + + memset(snp_cpuid_info, 0, sizeof(*snp_cpuid_info)); + + for (i = 0; i < kvm_cpuid_info->cpuid.nent; i++) { + const struct kvm_cpuid_entry2 *kvm_cpuid_entry; + SnpCpuidFunc *snp_cpuid_entry; + + kvm_cpuid_entry = &kvm_cpuid_info->entries[i]; + snp_cpuid_entry = &snp_cpuid_info->entries[i]; + + snp_cpuid_entry->eax_in = kvm_cpuid_entry->function; + if (kvm_cpuid_entry->flags == KVM_CPUID_FLAG_SIGNIFCANT_INDEX) { + snp_cpuid_entry->ecx_in = kvm_cpuid_entry->index; + } + snp_cpuid_entry->eax = kvm_cpuid_entry->eax; + snp_cpuid_entry->ebx = kvm_cpuid_entry->ebx; + snp_cpuid_entry->ecx = kvm_cpuid_entry->ecx; + snp_cpuid_entry->edx = kvm_cpuid_entry->edx; + + /* + * Guest kernels will calculate EBX themselves using the 0xD + * subfunctions corresponding to the individual XSAVE areas, so only + * encode the base XSAVE size in the initial leaves, corresponding + * to the initial XCR0=1 state. + */ + if (snp_cpuid_entry->eax_in == 0xD && + (snp_cpuid_entry->ecx_in == 0x0 || snp_cpuid_entry->ecx_in == 0x1)) { + snp_cpuid_entry->ebx = 0x240; + snp_cpuid_entry->xcr0_in = 1; + snp_cpuid_entry->xss_in = 0; + } + } + + snp_cpuid_info->count = i; + + return 0; +} + +static int +snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) +{ + KvmCpuidInfo kvm_cpuid_info = {0}; + SnpCpuidInfo snp_cpuid_info; + CPUState *cs = first_cpu; + int ret; + uint32_t i = 0; + + assert(sizeof(snp_cpuid_info) <= cpuid_len); + + /* get the cpuid list from KVM */ + do { + kvm_cpuid_info.cpuid.nent = ++i; + ret = kvm_vcpu_ioctl(cs, KVM_GET_CPUID2, &kvm_cpuid_info); + } while (ret == -E2BIG); + + if (ret) { + error_report("SEV-SNP: unable to query CPUID values for CPU: '%s'", + strerror(-ret)); + return 1; + } + + ret = sev_snp_cpuid_info_fill(&snp_cpuid_info, &kvm_cpuid_info); + if (ret) { + error_report("SEV-SNP: failed to generate CPUID table information"); + return 1; + } + + memcpy(hva, &snp_cpuid_info, sizeof(snp_cpuid_info)); + + return snp_launch_update_data(cpuid_addr, hva, cpuid_len, + KVM_SEV_SNP_PAGE_TYPE_CPUID); +} + static int snp_metadata_desc_to_page_type(int desc_type) { @@ -1053,7 +1208,12 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, exit(1); } - ret = snp_launch_update_data(desc->base, hva, desc->len, type); + if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { + ret = snp_launch_update_cpuid(desc->base, hva, desc->len); + } else { + ret = snp_launch_update_data(desc->base, hva, desc->len, type); + } + if (ret) { error_report("%s: Failed to add metadata page gpa 0x%x+%x type %d", __func__, desc->base, desc->len, desc->type); From 77d1abd91e5352ad30ae2f83790f95fa6a3c0b6b Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Thu, 30 May 2024 06:16:36 -0500 Subject: [PATCH 36/46] hw/i386/sev: Add support to encrypt BIOS when SEV-SNP is enabled As with SEV, an SNP guest requires that the BIOS be part of the initial encrypted/measured guest payload. Extend sev_encrypt_flash() to handle the SNP case and plumb through the GPA of the BIOS location since this is needed for SNP. Signed-off-by: Brijesh Singh Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-25-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc_sysfw.c | 12 +++++++----- hw/i386/x86-common.c | 2 +- include/hw/i386/x86.h | 2 +- target/i386/sev-sysemu-stub.c | 2 +- target/i386/sev.c | 5 +++-- target/i386/sev.h | 2 +- 6 files changed, 14 insertions(+), 11 deletions(-) diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 9b8671c441..7cdbafc8d2 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -148,6 +148,8 @@ static void pc_system_flash_map(PCMachineState *pcms, assert(PC_MACHINE_GET_CLASS(pcms)->pci_enabled); for (i = 0; i < ARRAY_SIZE(pcms->flash); i++) { + hwaddr gpa; + system_flash = pcms->flash[i]; blk = pflash_cfi01_get_blk(system_flash); if (!blk) { @@ -177,11 +179,11 @@ static void pc_system_flash_map(PCMachineState *pcms, } total_size += size; + gpa = 0x100000000ULL - total_size; /* where the flash is mapped */ qdev_prop_set_uint32(DEVICE(system_flash), "num-blocks", size / FLASH_SECTOR_SIZE); sysbus_realize_and_unref(SYS_BUS_DEVICE(system_flash), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, - 0x100000000ULL - total_size); + sysbus_mmio_map(SYS_BUS_DEVICE(system_flash), 0, gpa); if (i == 0) { flash_mem = pflash_cfi01_get_memory(system_flash); @@ -196,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms, if (sev_enabled()) { flash_ptr = memory_region_get_ram_ptr(flash_mem); flash_size = memory_region_size(flash_mem); - x86_firmware_configure(flash_ptr, flash_size); + x86_firmware_configure(gpa, flash_ptr, flash_size); } } } @@ -249,7 +251,7 @@ void pc_system_firmware_init(PCMachineState *pcms, pc_system_flash_cleanup_unused(pcms); } -void x86_firmware_configure(void *ptr, int size) +void x86_firmware_configure(hwaddr gpa, void *ptr, int size) { int ret; @@ -270,6 +272,6 @@ void x86_firmware_configure(void *ptr, int size) exit(1); } - sev_encrypt_flash(ptr, size, &error_fatal); + sev_encrypt_flash(gpa, ptr, size, &error_fatal); } } diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c index ee9046d9a8..f41cb0a6a8 100644 --- a/hw/i386/x86-common.c +++ b/hw/i386/x86-common.c @@ -1013,7 +1013,7 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, */ void *ptr = memory_region_get_ram_ptr(&x86ms->bios); load_image_size(filename, ptr, bios_size); - x86_firmware_configure(ptr, bios_size); + x86_firmware_configure(0x100000000ULL - bios_size, ptr, bios_size); } else { memory_region_set_readonly(&x86ms->bios, !isapc_ram_fw); ret = rom_add_file_fixed(bios_name, (uint32_t)(-bios_size), -1); diff --git a/include/hw/i386/x86.h b/include/hw/i386/x86.h index b006f16b8d..d43cb3908e 100644 --- a/include/hw/i386/x86.h +++ b/include/hw/i386/x86.h @@ -154,6 +154,6 @@ void ioapic_init_gsi(GSIState *gsi_state, Object *parent); DeviceState *ioapic_init_secondary(GSIState *gsi_state); /* pc_sysfw.c */ -void x86_firmware_configure(void *ptr, int size); +void x86_firmware_configure(hwaddr gpa, void *ptr, int size); #endif diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c index fc1c57c411..d5bf886e79 100644 --- a/target/i386/sev-sysemu-stub.c +++ b/target/i386/sev-sysemu-stub.c @@ -42,7 +42,7 @@ void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, error_setg(errp, "SEV is not available in this QEMU"); } -int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) +int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { g_assert_not_reached(); } diff --git a/target/i386/sev.c b/target/i386/sev.c index 06401f0526..7b5c4b4874 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1484,7 +1484,7 @@ static int sev_snp_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) } int -sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) +sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); @@ -1841,7 +1841,8 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) /* zero the excess data so the measurement can be reliably calculated */ memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); - if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { + if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, + sizeof(*padded_ht), errp) < 0) { ret = false; } diff --git a/target/i386/sev.h b/target/i386/sev.h index cc12824dd6..858005a119 100644 --- a/target/i386/sev.h +++ b/target/i386/sev.h @@ -59,7 +59,7 @@ uint32_t sev_get_cbit_position(void); uint32_t sev_get_reduced_phys_bits(void); bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); -int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); +int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp); int sev_inject_launch_secret(const char *hdr, const char *secret, uint64_t gpa, Error **errp); From 9861405a8f845133b7984322c2df0c43a45553c3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 May 2024 12:51:44 +0200 Subject: [PATCH 37/46] i386/sev: Invoke launch_updata_data() for SEV class Add launch_update_data() in SevCommonStateClass and invoke as sev_launch_update_data() for SEV object. Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-26-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 7b5c4b4874..8834cf9441 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -74,6 +74,7 @@ struct SevCommonStateClass { /* public */ int (*launch_start)(SevCommonState *sev_common); void (*launch_finish)(SevCommonState *sev_common); + int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); int (*kvm_init)(ConfidentialGuestSupport *cgs, Error **errp); }; @@ -929,7 +930,7 @@ out: } static int -sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) +sev_launch_update_data(SevCommonState *sev_common, hwaddr gpa, uint8_t *addr, uint64_t len) { int ret, fw_error; struct kvm_sev_launch_update_data update; @@ -941,7 +942,7 @@ sev_launch_update_data(SevGuestState *sev_guest, uint8_t *addr, uint64_t len) update.uaddr = (uintptr_t)addr; update.len = len; trace_kvm_sev_launch_update_data(addr, len); - ret = sev_ioctl(SEV_COMMON(sev_guest)->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + ret = sev_ioctl(sev_common->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, &update, &fw_error); if (ret) { error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", @@ -1487,6 +1488,7 @@ int sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) { SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); if (!sev_common) { return 0; @@ -1494,7 +1496,9 @@ sev_encrypt_flash(hwaddr gpa, uint8_t *ptr, uint64_t len, Error **errp) /* if SEV is in update state then encrypt the data else do nothing */ if (sev_check_state(sev_common, SEV_STATE_LAUNCH_UPDATE)) { - int ret = sev_launch_update_data(SEV_GUEST(sev_common), ptr, len); + int ret; + + ret = klass->launch_update_data(sev_common, gpa, ptr, len); if (ret < 0) { error_setg(errp, "SEV: Failed to encrypt pflash rom"); return ret; @@ -1968,6 +1972,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) klass->launch_start = sev_launch_start; klass->launch_finish = sev_launch_finish; + klass->launch_update_data = sev_launch_update_data; klass->kvm_init = sev_kvm_init; x86_klass->kvm_type = sev_kvm_type; From 0765d136eba400ad1cb7cae18438bb10eace64dc Mon Sep 17 00:00:00 2001 From: Pankaj Gupta Date: Thu, 30 May 2024 06:16:38 -0500 Subject: [PATCH 38/46] i386/sev: Invoke launch_updata_data() for SNP class Invoke as sev_snp_launch_update_data() for SNP object. Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-27-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index 8834cf9441..eaf5fc6c6b 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1091,6 +1091,15 @@ snp_launch_update_data(uint64_t gpa, void *hva, return 0; } +static int +sev_snp_launch_update_data(SevCommonState *sev_common, hwaddr gpa, + uint8_t *ptr, uint64_t len) +{ + int ret = snp_launch_update_data(gpa, ptr, len, + KVM_SEV_SNP_PAGE_TYPE_NORMAL); + return ret; +} + static int sev_snp_cpuid_info_fill(SnpCpuidInfo *snp_cpuid_info, const KvmCpuidInfo *kvm_cpuid_info) @@ -2216,6 +2225,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) klass->launch_start = sev_snp_launch_start; klass->launch_finish = sev_snp_launch_finish; + klass->launch_update_data = sev_snp_launch_update_data; klass->kvm_init = sev_snp_kvm_init; x86_klass->kvm_type = sev_snp_kvm_type; From 47e76d03b155e43beca550251a6eb7ea926c059f Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:42 -0500 Subject: [PATCH 39/46] i386/kvm: Add KVM_EXIT_HYPERCALL handling for KVM_HC_MAP_GPA_RANGE KVM_HC_MAP_GPA_RANGE will be used to send requests to userspace for private/shared memory attribute updates requested by the guest. Implement handling for that use-case along with some basic infrastructure for enabling specific hypercall events. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-31-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/kvm/kvm.c | 55 ++++++++++++++++++++++++++++++++++++ target/i386/kvm/kvm_i386.h | 1 + target/i386/kvm/trace-events | 1 + 3 files changed, 57 insertions(+) diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c index 23a003aaa7..ede3ef1225 100644 --- a/target/i386/kvm/kvm.c +++ b/target/i386/kvm/kvm.c @@ -21,6 +21,7 @@ #include #include +#include #include "standard-headers/asm-x86/kvm_para.h" #include "hw/xen/interface/arch-x86/cpuid.h" @@ -209,6 +210,13 @@ int kvm_get_vm_type(MachineState *ms) return kvm_type; } +bool kvm_enable_hypercall(uint64_t enable_mask) +{ + KVMState *s = KVM_STATE(current_accel()); + + return !kvm_vm_enable_cap(s, KVM_CAP_EXIT_HYPERCALL, 0, enable_mask); +} + bool kvm_has_smm(void) { return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); @@ -5322,6 +5330,50 @@ static bool host_supports_vmx(void) return ecx & CPUID_EXT_VMX; } +/* + * Currently the handling here only supports use of KVM_HC_MAP_GPA_RANGE + * to service guest-initiated memory attribute update requests so that + * KVM_SET_MEMORY_ATTRIBUTES can update whether or not a page should be + * backed by the private memory pool provided by guest_memfd, and as such + * is only applicable to guest_memfd-backed guests (e.g. SNP/TDX). + * + * Other other use-cases for KVM_HC_MAP_GPA_RANGE, such as for SEV live + * migration, are not implemented here currently. + * + * For the guest_memfd use-case, these exits will generally be synthesized + * by KVM based on platform-specific hypercalls, like GHCB requests in the + * case of SEV-SNP, and not issued directly within the guest though the + * KVM_HC_MAP_GPA_RANGE hypercall. So in this case, KVM_HC_MAP_GPA_RANGE is + * not actually advertised to guests via the KVM CPUID feature bit, as + * opposed to SEV live migration where it would be. Since it is unlikely the + * SEV live migration use-case would be useful for guest-memfd backed guests, + * because private/shared page tracking is already provided through other + * means, these 2 use-cases should be treated as being mutually-exclusive. + */ +static int kvm_handle_hc_map_gpa_range(struct kvm_run *run) +{ + uint64_t gpa, size, attributes; + + if (!machine_require_guest_memfd(current_machine)) + return -EINVAL; + + gpa = run->hypercall.args[0]; + size = run->hypercall.args[1] * TARGET_PAGE_SIZE; + attributes = run->hypercall.args[2]; + + trace_kvm_hc_map_gpa_range(gpa, size, attributes, run->hypercall.flags); + + return kvm_convert_memory(gpa, size, attributes & KVM_MAP_GPA_RANGE_ENCRYPTED); +} + +static int kvm_handle_hypercall(struct kvm_run *run) +{ + if (run->hypercall.nr == KVM_HC_MAP_GPA_RANGE) + return kvm_handle_hc_map_gpa_range(run); + + return -EINVAL; +} + #define VMX_INVALID_GUEST_STATE 0x80000021 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) @@ -5417,6 +5469,9 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) ret = kvm_xen_handle_exit(cpu, &run->xen); break; #endif + case KVM_EXIT_HYPERCALL: + ret = kvm_handle_hypercall(run); + break; default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h index 6b44844d95..34fc60774b 100644 --- a/target/i386/kvm/kvm_i386.h +++ b/target/i386/kvm/kvm_i386.h @@ -33,6 +33,7 @@ bool kvm_has_smm(void); bool kvm_enable_x2apic(void); bool kvm_hv_vpindex_settable(void); +bool kvm_enable_hypercall(uint64_t enable_mask); bool kvm_enable_sgx_provisioning(KVMState *s); bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); diff --git a/target/i386/kvm/trace-events b/target/i386/kvm/trace-events index b365a8e8e2..74a6234ff7 100644 --- a/target/i386/kvm/trace-events +++ b/target/i386/kvm/trace-events @@ -5,6 +5,7 @@ kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d" kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d" kvm_x86_update_msi_routes(int num) "Updated %d MSI routes" +kvm_hc_map_gpa_range(uint64_t gpa, uint64_t size, uint64_t attributes, uint64_t flags) "gpa 0x%" PRIx64 " size 0x%" PRIx64 " attributes 0x%" PRIx64 " flags 0x%" PRIx64 # xen-emu.c kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64 From e3cddff93c1f88fea3b26841e792dc0be6b6fae8 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:43 -0500 Subject: [PATCH 40/46] i386/sev: Enable KVM_HC_MAP_GPA_RANGE hcall for SNP guests KVM will forward GHCB page-state change requests to userspace in the form of KVM_HC_MAP_GPA_RANGE, so make sure the hypercall handling is enabled for SNP guests. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-32-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/target/i386/sev.c b/target/i386/sev.c index eaf5fc6c6b..abb63062ac 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -14,6 +14,7 @@ #include "qemu/osdep.h" #include +#include #include #include @@ -758,6 +759,10 @@ sev_snp_launch_start(SevCommonState *sev_common) trace_kvm_sev_snp_launch_start(start->policy, sev_snp_guest->guest_visible_workarounds); + if (!kvm_enable_hypercall(BIT_ULL(KVM_HC_MAP_GPA_RANGE))) { + return 1; + } + rc = sev_ioctl(sev_common->sev_fd, KVM_SEV_SNP_LAUNCH_START, start, &fw_error); if (rc < 0) { From 06cbd66cecaa3230cccb330facac241a677b29d5 Mon Sep 17 00:00:00 2001 From: Dov Murik Date: Thu, 30 May 2024 06:16:33 -0500 Subject: [PATCH 41/46] i386/sev: Extract build_kernel_loader_hashes Extract the building of the kernel hashes table out from sev_add_kernel_loader_hashes() to allow building it in other memory areas (for SNP support). No functional change intended. Signed-off-by: Dov Murik Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-22-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 102 ++++++++++++++++++++++++++-------------------- 1 file changed, 58 insertions(+), 44 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index abb63062ac..73f9406715 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -1754,45 +1754,16 @@ static const QemuUUID sev_cmdline_entry_guid = { 0x4d, 0x36, 0xab, 0x2a) }; -/* - * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page - * which is included in SEV's initial memory measurement. - */ -bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) +static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, + SevKernelLoaderContext *ctx, + Error **errp) { - uint8_t *data; - SevHashTableDescriptor *area; SevHashTable *ht; - PaddedSevHashTable *padded_ht; uint8_t cmdline_hash[HASH_SIZE]; uint8_t initrd_hash[HASH_SIZE]; uint8_t kernel_hash[HASH_SIZE]; uint8_t *hashp; size_t hash_len = HASH_SIZE; - hwaddr mapped_len = sizeof(*padded_ht); - MemTxAttrs attrs = { 0 }; - bool ret = true; - SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - - /* - * Only add the kernel hashes if the sev-guest configuration explicitly - * stated kernel-hashes=on. - */ - if (!sev_common->kernel_hashes) { - return false; - } - - if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { - error_setg(errp, "SEV: kernel specified but guest firmware " - "has no hashes table GUID"); - return false; - } - area = (SevHashTableDescriptor *)data; - if (!area->base || area->size < sizeof(PaddedSevHashTable)) { - error_setg(errp, "SEV: guest firmware hashes table area is invalid " - "(base=0x%x size=0x%x)", area->base, area->size); - return false; - } /* * Calculate hash of kernel command-line with the terminating null byte. If @@ -1829,16 +1800,6 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) } assert(hash_len == HASH_SIZE); - /* - * Populate the hashes table in the guest's memory at the OVMF-designated - * area for the SEV hashes table - */ - padded_ht = address_space_map(&address_space_memory, area->base, - &mapped_len, true, attrs); - if (!padded_ht || mapped_len != sizeof(*padded_ht)) { - error_setg(errp, "SEV: cannot map hashes table guest memory area"); - return false; - } ht = &padded_ht->ht; ht->guid = sev_hash_table_header_guid; @@ -1859,8 +1820,61 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) /* zero the excess data so the measurement can be reliably calculated */ memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); - if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, - sizeof(*padded_ht), errp) < 0) { + return true; +} + +/* + * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page + * which is included in SEV's initial memory measurement. + */ +bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) +{ + uint8_t *data; + SevHashTableDescriptor *area; + PaddedSevHashTable *padded_ht; + hwaddr mapped_len = sizeof(*padded_ht); + MemTxAttrs attrs = { 0 }; + bool ret = true; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly + * stated kernel-hashes=on. + */ + if (!sev_common->kernel_hashes) { + return false; + } + + if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { + error_setg(errp, "SEV: kernel specified but guest firmware " + "has no hashes table GUID"); + return false; + } + + area = (SevHashTableDescriptor *)data; + if (!area->base || area->size < sizeof(PaddedSevHashTable)) { + error_setg(errp, "SEV: guest firmware hashes table area is invalid " + "(base=0x%x size=0x%x)", area->base, area->size); + return false; + } + + /* + * Populate the hashes table in the guest's memory at the OVMF-designated + * area for the SEV hashes table + */ + padded_ht = address_space_map(&address_space_memory, area->base, + &mapped_len, true, attrs); + if (!padded_ht || mapped_len != sizeof(*padded_ht)) { + error_setg(errp, "SEV: cannot map hashes table guest memory area"); + return false; + } + + if (build_kernel_loader_hashes(padded_ht, ctx, errp)) { + if (sev_encrypt_flash(area->base, (uint8_t *)padded_ht, + sizeof(*padded_ht), errp) < 0) { + ret = false; + } + } else { ret = false; } From cc483bf911931f405dea682c74a3d8b9b6c54369 Mon Sep 17 00:00:00 2001 From: Dov Murik Date: Thu, 30 May 2024 06:16:34 -0500 Subject: [PATCH 42/46] i386/sev: Reorder struct declarations Move the declaration of PaddedSevHashTable before SevSnpGuest so we can add a new such field to the latter. No functional change intended. Signed-off-by: Dov Murik Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-23-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- target/i386/sev.c | 84 +++++++++++++++++++++++------------------------ 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/target/i386/sev.c b/target/i386/sev.c index 73f9406715..3fce4c08eb 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -46,6 +46,48 @@ OBJECT_DECLARE_TYPE(SevCommonState, SevCommonStateClass, SEV_COMMON) OBJECT_DECLARE_TYPE(SevGuestState, SevCommonStateClass, SEV_GUEST) OBJECT_DECLARE_TYPE(SevSnpGuestState, SevCommonStateClass, SEV_SNP_GUEST) +/* hard code sha256 digest size */ +#define HASH_SIZE 32 + +typedef struct QEMU_PACKED SevHashTableEntry { + QemuUUID guid; + uint16_t len; + uint8_t hash[HASH_SIZE]; +} SevHashTableEntry; + +typedef struct QEMU_PACKED SevHashTable { + QemuUUID guid; + uint16_t len; + SevHashTableEntry cmdline; + SevHashTableEntry initrd; + SevHashTableEntry kernel; +} SevHashTable; + +/* + * Data encrypted by sev_encrypt_flash() must be padded to a multiple of + * 16 bytes. + */ +typedef struct QEMU_PACKED PaddedSevHashTable { + SevHashTable ht; + uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; +} PaddedSevHashTable; + +QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); + +#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" +typedef struct __attribute__((__packed__)) SevInfoBlock { + /* SEV-ES Reset Vector Address */ + uint32_t reset_addr; +} SevInfoBlock; + +#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" +typedef struct QEMU_PACKED SevHashTableDescriptor { + /* SEV hash table area guest address */ + uint32_t base; + /* SEV hash table area size (in bytes) */ + uint32_t size; +} SevHashTableDescriptor; + struct SevCommonState { X86ConfidentialGuest parent_obj; @@ -128,48 +170,6 @@ typedef struct SevLaunchUpdateData { static QTAILQ_HEAD(, SevLaunchUpdateData) launch_update; -#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" -typedef struct __attribute__((__packed__)) SevInfoBlock { - /* SEV-ES Reset Vector Address */ - uint32_t reset_addr; -} SevInfoBlock; - -#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" -typedef struct QEMU_PACKED SevHashTableDescriptor { - /* SEV hash table area guest address */ - uint32_t base; - /* SEV hash table area size (in bytes) */ - uint32_t size; -} SevHashTableDescriptor; - -/* hard code sha256 digest size */ -#define HASH_SIZE 32 - -typedef struct QEMU_PACKED SevHashTableEntry { - QemuUUID guid; - uint16_t len; - uint8_t hash[HASH_SIZE]; -} SevHashTableEntry; - -typedef struct QEMU_PACKED SevHashTable { - QemuUUID guid; - uint16_t len; - SevHashTableEntry cmdline; - SevHashTableEntry initrd; - SevHashTableEntry kernel; -} SevHashTable; - -/* - * Data encrypted by sev_encrypt_flash() must be padded to a multiple of - * 16 bytes. - */ -typedef struct QEMU_PACKED PaddedSevHashTable { - SevHashTable ht; - uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; -} PaddedSevHashTable; - -QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); - static Error *sev_mig_blocker; static const char *const sev_fw_errlist[] = { From c1996992cc882b00139f78067d6a64e2ec9cb0d8 Mon Sep 17 00:00:00 2001 From: Dov Murik Date: Thu, 30 May 2024 06:16:35 -0500 Subject: [PATCH 43/46] i386/sev: Allow measured direct kernel boot on SNP In SNP, the hashes page designated with a specific metadata entry published in AmdSev OVMF. Therefore, if the user enabled kernel hashes (for measured direct boot), QEMU should prepare the content of hashes table, and during the processing of the metadata entry it copy the content into the designated page and encrypt it. Note that in SNP (unlike SEV and SEV-ES) the measurements is done in whole 4KB pages. Therefore QEMU zeros the whole page that includes the hashes table, and fills in the kernel hashes area in that page, and then encrypts the whole page. The rest of the page is reserved for SEV launch secrets which are not usable anyway on SNP. If the user disabled kernel hashes, QEMU pre-validates the kernel hashes page as a zero page. Signed-off-by: Dov Murik Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-24-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- include/hw/i386/pc.h | 2 + target/i386/sev.c | 113 ++++++++++++++++++++++++++++++++----------- 2 files changed, 86 insertions(+), 29 deletions(-) diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index c653b8eeb2..ca7904ac2c 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -172,6 +172,8 @@ typedef enum { SEV_DESC_TYPE_SNP_SECRETS, /* The section contains address that can be used as a CPUID page */ SEV_DESC_TYPE_CPUID, + /* The section contains the region for kernel hashes for measured direct boot */ + SEV_DESC_TYPE_SNP_KERNEL_HASHES = 0x10, } ovmf_sev_metadata_desc_type; diff --git a/target/i386/sev.c b/target/i386/sev.c index 3fce4c08eb..004c667ac1 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -115,6 +115,10 @@ struct SevCommonStateClass { X86ConfidentialGuestClass parent_class; /* public */ + bool (*build_kernel_loader_hashes)(SevCommonState *sev_common, + SevHashTableDescriptor *area, + SevKernelLoaderContext *ctx, + Error **errp); int (*launch_start)(SevCommonState *sev_common); void (*launch_finish)(SevCommonState *sev_common); int (*launch_update_data)(SevCommonState *sev_common, hwaddr gpa, uint8_t *ptr, uint64_t len); @@ -154,6 +158,9 @@ struct SevSnpGuestState { struct kvm_sev_snp_launch_start kvm_start_conf; struct kvm_sev_snp_launch_finish kvm_finish_conf; + + uint32_t kernel_hashes_offset; + PaddedSevHashTable *kernel_hashes_data; }; #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ @@ -1189,6 +1196,23 @@ snp_launch_update_cpuid(uint32_t cpuid_addr, void *hva, uint32_t cpuid_len) KVM_SEV_SNP_PAGE_TYPE_CPUID); } +static int +snp_launch_update_kernel_hashes(SevSnpGuestState *sev_snp, uint32_t addr, + void *hva, uint32_t len) +{ + int type = KVM_SEV_SNP_PAGE_TYPE_ZERO; + if (sev_snp->parent_obj.kernel_hashes) { + assert(sev_snp->kernel_hashes_data); + assert((sev_snp->kernel_hashes_offset + + sizeof(*sev_snp->kernel_hashes_data)) <= len); + memset(hva, 0, len); + memcpy(hva + sev_snp->kernel_hashes_offset, sev_snp->kernel_hashes_data, + sizeof(*sev_snp->kernel_hashes_data)); + type = KVM_SEV_SNP_PAGE_TYPE_NORMAL; + } + return snp_launch_update_data(addr, hva, len, type); +} + static int snp_metadata_desc_to_page_type(int desc_type) { @@ -1225,6 +1249,9 @@ snp_populate_metadata_pages(SevSnpGuestState *sev_snp, if (type == KVM_SEV_SNP_PAGE_TYPE_CPUID) { ret = snp_launch_update_cpuid(desc->base, hva, desc->len); + } else if (desc->type == SEV_DESC_TYPE_SNP_KERNEL_HASHES) { + ret = snp_launch_update_kernel_hashes(sev_snp, desc->base, hva, + desc->len); } else { ret = snp_launch_update_data(desc->base, hva, desc->len, type); } @@ -1823,40 +1850,31 @@ static bool build_kernel_loader_hashes(PaddedSevHashTable *padded_ht, return true; } -/* - * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page - * which is included in SEV's initial memory measurement. - */ -bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) +static bool sev_snp_build_kernel_loader_hashes(SevCommonState *sev_common, + SevHashTableDescriptor *area, + SevKernelLoaderContext *ctx, + Error **errp) +{ + /* + * SNP: Populate the hashes table in an area that later in + * snp_launch_update_kernel_hashes() will be copied to the guest memory + * and encrypted. + */ + SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(sev_common); + sev_snp_guest->kernel_hashes_offset = area->base & ~TARGET_PAGE_MASK; + sev_snp_guest->kernel_hashes_data = g_new0(PaddedSevHashTable, 1); + return build_kernel_loader_hashes(sev_snp_guest->kernel_hashes_data, ctx, errp); +} + +static bool sev_build_kernel_loader_hashes(SevCommonState *sev_common, + SevHashTableDescriptor *area, + SevKernelLoaderContext *ctx, + Error **errp) { - uint8_t *data; - SevHashTableDescriptor *area; PaddedSevHashTable *padded_ht; hwaddr mapped_len = sizeof(*padded_ht); MemTxAttrs attrs = { 0 }; bool ret = true; - SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); - - /* - * Only add the kernel hashes if the sev-guest configuration explicitly - * stated kernel-hashes=on. - */ - if (!sev_common->kernel_hashes) { - return false; - } - - if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { - error_setg(errp, "SEV: kernel specified but guest firmware " - "has no hashes table GUID"); - return false; - } - - area = (SevHashTableDescriptor *)data; - if (!area->base || area->size < sizeof(PaddedSevHashTable)) { - error_setg(errp, "SEV: guest firmware hashes table area is invalid " - "(base=0x%x size=0x%x)", area->base, area->size); - return false; - } /* * Populate the hashes table in the guest's memory at the OVMF-designated @@ -1884,6 +1902,41 @@ bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) return ret; } +/* + * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page + * which is included in SEV's initial memory measurement. + */ +bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) +{ + uint8_t *data; + SevHashTableDescriptor *area; + SevCommonState *sev_common = SEV_COMMON(MACHINE(qdev_get_machine())->cgs); + SevCommonStateClass *klass = SEV_COMMON_GET_CLASS(sev_common); + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly + * stated kernel-hashes=on. + */ + if (!sev_common->kernel_hashes) { + return false; + } + + if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { + error_setg(errp, "SEV: kernel specified but guest firmware " + "has no hashes table GUID"); + return false; + } + + area = (SevHashTableDescriptor *)data; + if (!area->base || area->size < sizeof(PaddedSevHashTable)) { + error_setg(errp, "SEV: guest firmware hashes table area is invalid " + "(base=0x%x size=0x%x)", area->base, area->size); + return false; + } + + return klass->build_kernel_loader_hashes(sev_common, area, ctx, errp); +} + static char * sev_common_get_sev_device(Object *obj, Error **errp) { @@ -1998,6 +2051,7 @@ sev_guest_class_init(ObjectClass *oc, void *data) SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + klass->build_kernel_loader_hashes = sev_build_kernel_loader_hashes; klass->launch_start = sev_launch_start; klass->launch_finish = sev_launch_finish; klass->launch_update_data = sev_launch_update_data; @@ -2242,6 +2296,7 @@ sev_snp_guest_class_init(ObjectClass *oc, void *data) SevCommonStateClass *klass = SEV_COMMON_CLASS(oc); X86ConfidentialGuestClass *x86_klass = X86_CONFIDENTIAL_GUEST_CLASS(oc); + klass->build_kernel_loader_hashes = sev_snp_build_kernel_loader_hashes; klass->launch_start = sev_snp_launch_start; klass->launch_finish = sev_snp_launch_finish; klass->launch_update_data = sev_snp_launch_update_data; From a0aa6db7ce72a08703774107185e639e73e7754c Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Thu, 30 May 2024 06:16:15 -0500 Subject: [PATCH 44/46] memory: Introduce memory_region_init_ram_guest_memfd() Introduce memory_region_init_ram_guest_memfd() to allocate private guset memfd on the MemoryRegion initialization. It's for the use case of TDVF, which must be private on TDX case. Signed-off-by: Xiaoyao Li Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-4-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- include/exec/memory.h | 6 ++++++ system/memory.c | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/include/exec/memory.h b/include/exec/memory.h index 9cdd64e9c6..1be58f694c 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -1638,6 +1638,12 @@ bool memory_region_init_ram(MemoryRegion *mr, uint64_t size, Error **errp); +bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + Error **errp); + /** * memory_region_init_rom: Initialize a ROM memory region. * diff --git a/system/memory.c b/system/memory.c index 9540caa8a1..74cd73ebc7 100644 --- a/system/memory.c +++ b/system/memory.c @@ -3649,6 +3649,30 @@ bool memory_region_init_ram(MemoryRegion *mr, return true; } +bool memory_region_init_ram_guest_memfd(MemoryRegion *mr, + Object *owner, + const char *name, + uint64_t size, + Error **errp) +{ + DeviceState *owner_dev; + + if (!memory_region_init_ram_flags_nomigrate(mr, owner, name, size, + RAM_GUEST_MEMFD, errp)) { + return false; + } + /* This will assert if owner is neither NULL nor a DeviceState. + * We only want the owner here for the purposes of defining a + * unique name for migration. TODO: Ideally we should implement + * a naming scheme for Objects which are not DeviceStates, in + * which case we can relax this restriction. + */ + owner_dev = DEVICE(owner); + vmstate_register_ram(mr, owner_dev); + + return true; +} + bool memory_region_init_rom(MemoryRegion *mr, Object *owner, const char *name, From 413a67450750e0459efeffc3db3ba9759c3e381c Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:39 -0500 Subject: [PATCH 45/46] hw/i386/sev: Use guest_memfd for legacy ROMs Current SNP guest kernels will attempt to access these regions with with C-bit set, so guest_memfd is needed to handle that. Otherwise, kvm_convert_memory() will fail when the guest kernel tries to access it and QEMU attempts to call KVM_SET_MEMORY_ATTRIBUTES to set these ranges to private. Whether guests should actually try to access ROM regions in this way (or need to deal with legacy ROM regions at all), is a separate issue to be addressed on kernel side, but current SNP guest kernels will exhibit this behavior and so this handling is needed to allow QEMU to continue running existing SNP guest kernels. Signed-off-by: Michael Roth [pankaj: Added sev_snp_enabled() check] Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-28-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- hw/i386/pc.c | 14 ++++++++++---- hw/i386/pc_sysfw.c | 19 +++++++++++++------ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 7b638da7aa..0469af00a7 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -62,6 +62,7 @@ #include "hw/mem/memory-device.h" #include "e820_memory_layout.h" #include "trace.h" +#include "sev.h" #include CONFIG_DEVICES #ifdef CONFIG_XEN_EMU @@ -1022,10 +1023,15 @@ void pc_memory_init(PCMachineState *pcms, pc_system_firmware_init(pcms, rom_memory); option_rom_mr = g_malloc(sizeof(*option_rom_mr)); - memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, - &error_fatal); - if (pcmc->pci_enabled) { - memory_region_set_readonly(option_rom_mr, true); + if (machine_require_guest_memfd(machine)) { + memory_region_init_ram_guest_memfd(option_rom_mr, NULL, "pc.rom", + PC_ROM_SIZE, &error_fatal); + } else { + memory_region_init_ram(option_rom_mr, NULL, "pc.rom", PC_ROM_SIZE, + &error_fatal); + if (pcmc->pci_enabled) { + memory_region_set_readonly(option_rom_mr, true); + } } memory_region_add_subregion_overlap(rom_memory, PC_ROM_MIN_VGA, diff --git a/hw/i386/pc_sysfw.c b/hw/i386/pc_sysfw.c index 7cdbafc8d2..ef80281d28 100644 --- a/hw/i386/pc_sysfw.c +++ b/hw/i386/pc_sysfw.c @@ -40,8 +40,8 @@ #define FLASH_SECTOR_SIZE 4096 -static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, - MemoryRegion *flash_mem) +static void pc_isa_bios_init(PCMachineState *pcms, MemoryRegion *isa_bios, + MemoryRegion *rom_memory, MemoryRegion *flash_mem) { int isa_bios_size; uint64_t flash_size; @@ -51,8 +51,13 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, /* map the last 128KB of the BIOS in ISA space */ isa_bios_size = MIN(flash_size, 128 * KiB); - memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, - &error_fatal); + if (machine_require_guest_memfd(MACHINE(pcms))) { + memory_region_init_ram_guest_memfd(isa_bios, NULL, "isa-bios", + isa_bios_size, &error_fatal); + } else { + memory_region_init_ram(isa_bios, NULL, "isa-bios", isa_bios_size, + &error_fatal); + } memory_region_add_subregion_overlap(rom_memory, 0x100000 - isa_bios_size, isa_bios, @@ -65,7 +70,9 @@ static void pc_isa_bios_init(MemoryRegion *isa_bios, MemoryRegion *rom_memory, ((uint8_t*)flash_ptr) + (flash_size - isa_bios_size), isa_bios_size); - memory_region_set_readonly(isa_bios, true); + if (!machine_require_guest_memfd(current_machine)) { + memory_region_set_readonly(isa_bios, true); + } } static PFlashCFI01 *pc_pflash_create(PCMachineState *pcms, @@ -191,7 +198,7 @@ static void pc_system_flash_map(PCMachineState *pcms, x86_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem, true); } else { - pc_isa_bios_init(&x86ms->isa_bios, rom_memory, flash_mem); + pc_isa_bios_init(pcms, &x86ms->isa_bios, rom_memory, flash_mem); } /* Encrypt the pflash boot ROM */ From fc7a69e177e4ba26d11fcf47b853f85115b35a11 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 30 May 2024 06:16:40 -0500 Subject: [PATCH 46/46] hw/i386: Add support for loading BIOS using guest_memfd When guest_memfd is enabled, the BIOS is generally part of the initial encrypted guest image and will be accessed as private guest memory. Add the necessary changes to set up the associated RAM region with a guest_memfd backend to allow for this. Current support centers around using -bios to load the BIOS data. Support for loading the BIOS via pflash requires additional enablement since those interfaces rely on the use of ROM memory regions which make use of the KVM_MEM_READONLY memslot flag, which is not supported for guest_memfd-backed memslots. Signed-off-by: Michael Roth Signed-off-by: Pankaj Gupta Message-ID: <20240530111643.1091816-29-pankaj.gupta@amd.com> Signed-off-by: Paolo Bonzini --- hw/i386/x86-common.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c index f41cb0a6a8..c0c66a0eb5 100644 --- a/hw/i386/x86-common.c +++ b/hw/i386/x86-common.c @@ -1001,8 +1001,13 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, (bios_size % 65536) != 0) { goto bios_error; } - memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", bios_size, - &error_fatal); + if (machine_require_guest_memfd(MACHINE(x86ms))) { + memory_region_init_ram_guest_memfd(&x86ms->bios, NULL, "pc.bios", + bios_size, &error_fatal); + } else { + memory_region_init_ram(&x86ms->bios, NULL, "pc.bios", + bios_size, &error_fatal); + } if (sev_enabled()) { /* * The concept of a "reset" simply doesn't exist for @@ -1023,9 +1028,11 @@ void x86_bios_rom_init(X86MachineState *x86ms, const char *default_firmware, } g_free(filename); - /* map the last 128KB of the BIOS in ISA space */ - x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, - !isapc_ram_fw); + if (!machine_require_guest_memfd(MACHINE(x86ms))) { + /* map the last 128KB of the BIOS in ISA space */ + x86_isa_bios_init(&x86ms->isa_bios, rom_memory, &x86ms->bios, + !isapc_ram_fw); + } /* map all the bios at the top of memory */ memory_region_add_subregion(rom_memory,