From 782ee711be9390f3586e615be49585aefd7fcaac Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Fri, 21 Jul 2023 10:34:11 -0300 Subject: [PATCH 01/45] target/riscv/cpu.c: do not run 'host' CPU with TCG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'host' CPU is available in a CONFIG_KVM build and it's currently available for all accels, but is a KVM only CPU. This means that in a RISC-V KVM capable host we can do things like this: $ ./build/qemu-system-riscv64 -M virt,accel=tcg -cpu host --nographic qemu-system-riscv64: H extension requires priv spec 1.12.0 This CPU does not have a priv spec because we don't filter its extensions via priv spec. We shouldn't be reaching riscv_cpu_realize_tcg() at all with the 'host' CPU. We don't have a way to filter the 'host' CPU out of the available CPU options (-cpu help) if the build includes both KVM and TCG. What we can do is to error out during riscv_cpu_realize_tcg() if the user chooses the 'host' CPU with accel=tcg: $ ./build/qemu-system-riscv64 -M virt,accel=tcg -cpu host --nographic qemu-system-riscv64: 'host' CPU is not compatible with TCG acceleration Signed-off-by: Daniel Henrique Barboza Reviewed-by: Alistair Francis Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230721133411.474105-1-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 6b93b04453..08db3d613f 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -1395,6 +1395,11 @@ static void riscv_cpu_realize_tcg(DeviceState *dev, Error **errp) CPURISCVState *env = &cpu->env; Error *local_err = NULL; + if (object_dynamic_cast(OBJECT(dev), TYPE_RISCV_CPU_HOST)) { + error_setg(errp, "'host' CPU is not compatible with TCG acceleration"); + return; + } + riscv_cpu_validate_misa_mxl(cpu, &local_err); if (local_err != NULL) { error_propagate(errp, local_err); From c255946e3df4d9660e4f468a456633c24393d468 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 21 Jul 2023 11:47:19 +0200 Subject: [PATCH 02/45] hw/char/riscv_htif: Fix printing of console characters on big endian hosts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The character that should be printed is stored in the 64 bit "payload" variable. The code currently tries to print it by taking the address of the variable and passing this pointer to qemu_chr_fe_write(). However, this only works on little endian hosts where the least significant bits are stored on the lowest address. To do this in a portable way, we have to store the value in an uint8_t variable instead. Fixes: 5033606780 ("RISC-V HTIF Console") Signed-off-by: Thomas Huth Reviewed-by: Alistair Francis Reviewed-by: Bin Meng Reviewed-by: Daniel Henrique Barboza Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230721094720.902454-2-thuth@redhat.com> Signed-off-by: Alistair Francis --- hw/char/riscv_htif.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c index 37d3ccc76b..f96df40124 100644 --- a/hw/char/riscv_htif.c +++ b/hw/char/riscv_htif.c @@ -232,7 +232,8 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written) s->tohost = 0; /* clear to indicate we read */ return; } else if (cmd == HTIF_CONSOLE_CMD_PUTC) { - qemu_chr_fe_write(&s->chr, (uint8_t *)&payload, 1); + uint8_t ch = (uint8_t)payload; + qemu_chr_fe_write(&s->chr, &ch, 1); resp = 0x100 | (uint8_t)payload; } else { qemu_log("HTIF device %d: unknown command\n", device); From 058096f1c55ab688db7e1d6814aaefc1bcd87f7a Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Fri, 21 Jul 2023 11:47:20 +0200 Subject: [PATCH 03/45] hw/char/riscv_htif: Fix the console syscall on big endian hosts Values that have been read via cpu_physical_memory_read() from the guest's memory have to be swapped in case the host endianess differs from the guest. Fixes: a6e13e31d5 ("riscv_htif: Support console output via proxy syscall") Signed-off-by: Thomas Huth Reviewed-by: Alistair Francis Reviewed-by: Bin Meng Reviewed-by: Daniel Henrique Barboza Message-Id: <20230721094720.902454-3-thuth@redhat.com> Signed-off-by: Alistair Francis --- hw/char/riscv_htif.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/hw/char/riscv_htif.c b/hw/char/riscv_htif.c index f96df40124..40de6b8b77 100644 --- a/hw/char/riscv_htif.c +++ b/hw/char/riscv_htif.c @@ -30,6 +30,7 @@ #include "qemu/timer.h" #include "qemu/error-report.h" #include "exec/address-spaces.h" +#include "exec/tswap.h" #include "sysemu/dma.h" #define RISCV_DEBUG_HTIF 0 @@ -209,11 +210,11 @@ static void htif_handle_tohost_write(HTIFState *s, uint64_t val_written) } else { uint64_t syscall[8]; cpu_physical_memory_read(payload, syscall, sizeof(syscall)); - if (syscall[0] == PK_SYS_WRITE && - syscall[1] == HTIF_DEV_CONSOLE && - syscall[3] == HTIF_CONSOLE_CMD_PUTC) { + if (tswap64(syscall[0]) == PK_SYS_WRITE && + tswap64(syscall[1]) == HTIF_DEV_CONSOLE && + tswap64(syscall[3]) == HTIF_CONSOLE_CMD_PUTC) { uint8_t ch; - cpu_physical_memory_read(syscall[2], &ch, 1); + cpu_physical_memory_read(tswap64(syscall[2]), &ch, 1); qemu_chr_fe_write(&s->chr, &ch, 1); resp = 0x100 | (uint8_t)payload; } else { From 50f9464962fb41f04fd5f42e7ee2cb60942aba89 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 20 Jul 2023 10:24:23 -0300 Subject: [PATCH 04/45] target/riscv/cpu.c: add zmmul isa string zmmul was promoted from experimental to ratified in commit 6d00ffad4e95. Add a riscv,isa string for it. Fixes: 6d00ffad4e95 ("target/riscv: move zmmul out of the experimental properties") Signed-off-by: Daniel Henrique Barboza Reviewed-by: Weiwei Li Reviewed-by: Alistair Francis Message-Id: <20230720132424.371132-2-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 08db3d613f..6d02e85102 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -88,6 +88,7 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr), ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei), ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause), + ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul), ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), ISA_EXT_DATA_ENTRY(zfa, PRIV_VERSION_1_12_0, ext_zfa), ISA_EXT_DATA_ENTRY(zfbfmin, PRIV_VERSION_1_12_0, ext_zfbfmin), From 03d7bbfd04c2a68f6339adede99ceae10800dc91 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 20 Jul 2023 10:24:24 -0300 Subject: [PATCH 05/45] target/riscv/cpu.c: add smepmp isa string The cpu->cfg.epmp extension is still experimental, but it already has a 'smepmp' riscv,isa string. Add it. Signed-off-by: Daniel Henrique Barboza Reviewed-by: Weiwei Li Reviewed-by: Alistair Francis Message-Id: <20230720132424.371132-3-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 6d02e85102..921c19e6cd 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -130,6 +130,7 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), + ISA_EXT_DATA_ENTRY(smepmp, PRIV_VERSION_1_12_0, epmp), ISA_EXT_DATA_ENTRY(smstateen, PRIV_VERSION_1_12_0, ext_smstateen), ISA_EXT_DATA_ENTRY(ssaia, PRIV_VERSION_1_12_0, ext_ssaia), ISA_EXT_DATA_ENTRY(sscofpmf, PRIV_VERSION_1_12_0, ext_sscofpmf), From 4cc9f284d5971ecd8055d26ef74c23ef0be8b8f5 Mon Sep 17 00:00:00 2001 From: LIU Zhiwei Date: Sat, 29 Jul 2023 11:16:18 +0800 Subject: [PATCH 06/45] target/riscv: Fix page_check_range use in fault-only-first Commit bef6f008b98(accel/tcg: Return bool from page_check_range) converts integer return value to bool type. However, it wrongly converted the use of the API in riscv fault-only-first, where page_check_range < = 0, should be converted to !page_check_range. Signed-off-by: LIU Zhiwei Reviewed-by: Richard Henderson Message-ID: <20230729031618.821-1-zhiwei_liu@linux.alibaba.com> Signed-off-by: Alistair Francis --- target/riscv/vector_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index bc9e151aa9..379f03df06 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -584,7 +584,7 @@ vext_ldff(void *vd, void *v0, target_ulong base, cpu_mmu_index(env, false)); if (host) { #ifdef CONFIG_USER_ONLY - if (page_check_range(addr, offset, PAGE_READ)) { + if (!page_check_range(addr, offset, PAGE_READ)) { vl = i; goto ProbeSuccess; } From 9ea17007c4ae4420ccd917eb300c7db49483a5b8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 31 Jul 2023 10:40:43 +0200 Subject: [PATCH 07/45] target/riscv: Use existing lookup tables for MixColumns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The AES MixColumns and InvMixColumns operations are relatively expensive 4x4 matrix multiplications in GF(2^8), which is why C implementations usually rely on precomputed lookup tables rather than performing the calculations on demand. Given that we already carry those tables in QEMU, we can just grab the right value in the implementation of the RISC-V AES32 instructions. Note that the tables in question are permuted according to the respective Sbox, so we can omit the Sbox lookup as well in this case. Cc: Richard Henderson Cc: Philippe Mathieu-Daudé Cc: Zewen Ye Cc: Weiwei Li Cc: Junqiang Wang Signed-off-by: Ard Biesheuvel Reviewed-by: Richard Henderson Message-ID: <20230731084043.1791984-1-ardb@kernel.org> Signed-off-by: Alistair Francis --- crypto/aes.c | 4 ++-- include/crypto/aes.h | 7 +++++++ target/riscv/crypto_helper.c | 34 ++++------------------------------ 3 files changed, 13 insertions(+), 32 deletions(-) diff --git a/crypto/aes.c b/crypto/aes.c index 836d7d5c0b..df4362ac60 100644 --- a/crypto/aes.c +++ b/crypto/aes.c @@ -272,7 +272,7 @@ AES_Td3[x] = Si[x].[09, 0d, 0b, 0e]; AES_Td4[x] = Si[x].[01, 01, 01, 01]; */ -static const uint32_t AES_Te0[256] = { +const uint32_t AES_Te0[256] = { 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU, @@ -607,7 +607,7 @@ static const uint32_t AES_Te4[256] = { 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, }; -static const uint32_t AES_Td0[256] = { +const uint32_t AES_Td0[256] = { 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U, diff --git a/include/crypto/aes.h b/include/crypto/aes.h index 709d4d226b..381f24c902 100644 --- a/include/crypto/aes.h +++ b/include/crypto/aes.h @@ -30,4 +30,11 @@ void AES_decrypt(const unsigned char *in, unsigned char *out, extern const uint8_t AES_sbox[256]; extern const uint8_t AES_isbox[256]; +/* +AES_Te0[x] = S [x].[02, 01, 01, 03]; +AES_Td0[x] = Si[x].[0e, 09, 0d, 0b]; +*/ + +extern const uint32_t AES_Te0[256], AES_Td0[256]; + #endif diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c index 99d85a6188..4d65945429 100644 --- a/target/riscv/crypto_helper.c +++ b/target/riscv/crypto_helper.c @@ -25,29 +25,6 @@ #include "crypto/aes-round.h" #include "crypto/sm4.h" -#define AES_XTIME(a) \ - ((a << 1) ^ ((a & 0x80) ? 0x1b : 0)) - -#define AES_GFMUL(a, b) (( \ - (((b) & 0x1) ? (a) : 0) ^ \ - (((b) & 0x2) ? AES_XTIME(a) : 0) ^ \ - (((b) & 0x4) ? AES_XTIME(AES_XTIME(a)) : 0) ^ \ - (((b) & 0x8) ? AES_XTIME(AES_XTIME(AES_XTIME(a))) : 0)) & 0xFF) - -static inline uint32_t aes_mixcolumn_byte(uint8_t x, bool fwd) -{ - uint32_t u; - - if (fwd) { - u = (AES_GFMUL(x, 3) << 24) | (x << 16) | (x << 8) | - (AES_GFMUL(x, 2) << 0); - } else { - u = (AES_GFMUL(x, 0xb) << 24) | (AES_GFMUL(x, 0xd) << 16) | - (AES_GFMUL(x, 0x9) << 8) | (AES_GFMUL(x, 0xe) << 0); - } - return u; -} - #define sext32_xlen(x) (target_ulong)(int32_t)(x) static inline target_ulong aes32_operation(target_ulong shamt, @@ -55,23 +32,20 @@ static inline target_ulong aes32_operation(target_ulong shamt, bool enc, bool mix) { uint8_t si = rs2 >> shamt; - uint8_t so; uint32_t mixed; target_ulong res; if (enc) { - so = AES_sbox[si]; if (mix) { - mixed = aes_mixcolumn_byte(so, true); + mixed = be32_to_cpu(AES_Te0[si]); } else { - mixed = so; + mixed = AES_sbox[si]; } } else { - so = AES_isbox[si]; if (mix) { - mixed = aes_mixcolumn_byte(so, false); + mixed = be32_to_cpu(AES_Td0[si]); } else { - mixed = so; + mixed = AES_isbox[si]; } } mixed = rol32(mixed, shamt); From 98f40dd2edacc284abb75f9a8135513475ca95e8 Mon Sep 17 00:00:00 2001 From: Kiran Ostrolenk Date: Wed, 12 Jul 2023 00:59:00 +0800 Subject: [PATCH 08/45] target/riscv: Refactor some of the generic vector functionality Take some functions/macros out of `vector_helper` and put them in a new module called `vector_internals`. This ensures they can be used by both vector and vector-crypto helpers (latter implemented in proceeding commits). Signed-off-by: Kiran Ostrolenk Reviewed-by: Weiwei Li Signed-off-by: Max Chou Acked-by: Alistair Francis Message-ID: <20230711165917.2629866-2-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/meson.build | 1 + target/riscv/vector_helper.c | 201 +------------------------------- target/riscv/vector_internals.c | 81 +++++++++++++ target/riscv/vector_internals.h | 182 +++++++++++++++++++++++++++++ 4 files changed, 265 insertions(+), 200 deletions(-) create mode 100644 target/riscv/vector_internals.c create mode 100644 target/riscv/vector_internals.h diff --git a/target/riscv/meson.build b/target/riscv/meson.build index 7f56c5f88d..c3801ee5e0 100644 --- a/target/riscv/meson.build +++ b/target/riscv/meson.build @@ -16,6 +16,7 @@ riscv_ss.add(files( 'gdbstub.c', 'op_helper.c', 'vector_helper.c', + 'vector_internals.c', 'bitmanip_helper.c', 'translate.c', 'm128_helper.c', diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 379f03df06..1f29236a63 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -27,6 +27,7 @@ #include "fpu/softfloat.h" #include "tcg/tcg-gvec-desc.h" #include "internals.h" +#include "vector_internals.h" #include target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, @@ -73,68 +74,6 @@ target_ulong HELPER(vsetvl)(CPURISCVState *env, target_ulong s1, return vl; } -/* - * Note that vector data is stored in host-endian 64-bit chunks, - * so addressing units smaller than that needs a host-endian fixup. - */ -#if HOST_BIG_ENDIAN -#define H1(x) ((x) ^ 7) -#define H1_2(x) ((x) ^ 6) -#define H1_4(x) ((x) ^ 4) -#define H2(x) ((x) ^ 3) -#define H4(x) ((x) ^ 1) -#define H8(x) ((x)) -#else -#define H1(x) (x) -#define H1_2(x) (x) -#define H1_4(x) (x) -#define H2(x) (x) -#define H4(x) (x) -#define H8(x) (x) -#endif - -static inline uint32_t vext_nf(uint32_t desc) -{ - return FIELD_EX32(simd_data(desc), VDATA, NF); -} - -static inline uint32_t vext_vm(uint32_t desc) -{ - return FIELD_EX32(simd_data(desc), VDATA, VM); -} - -/* - * Encode LMUL to lmul as following: - * LMUL vlmul lmul - * 1 000 0 - * 2 001 1 - * 4 010 2 - * 8 011 3 - * - 100 - - * 1/8 101 -3 - * 1/4 110 -2 - * 1/2 111 -1 - */ -static inline int32_t vext_lmul(uint32_t desc) -{ - return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); -} - -static inline uint32_t vext_vta(uint32_t desc) -{ - return FIELD_EX32(simd_data(desc), VDATA, VTA); -} - -static inline uint32_t vext_vma(uint32_t desc) -{ - return FIELD_EX32(simd_data(desc), VDATA, VMA); -} - -static inline uint32_t vext_vta_all_1s(uint32_t desc) -{ - return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); -} - /* * Get the maximum number of elements can be operated. * @@ -153,21 +92,6 @@ static inline uint32_t vext_max_elems(uint32_t desc, uint32_t log2_esz) return scale < 0 ? vlenb >> -scale : vlenb << scale; } -/* - * Get number of total elements, including prestart, body and tail elements. - * Note that when LMUL < 1, the tail includes the elements past VLMAX that - * are held in the same vector register. - */ -static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, - uint32_t esz) -{ - uint32_t vlenb = simd_maxsz(desc); - uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); - int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : - ctzl(esz) - ctzl(sew) + vext_lmul(desc); - return (vlenb << emul) / esz; -} - static inline target_ulong adjust_addr(CPURISCVState *env, target_ulong addr) { return (addr & ~env->cur_pmmask) | env->cur_pmbase; @@ -200,20 +124,6 @@ static void probe_pages(CPURISCVState *env, target_ulong addr, } } -/* set agnostic elements to 1s */ -static void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, - uint32_t tot) -{ - if (is_agnostic == 0) { - /* policy undisturbed */ - return; - } - if (tot - cnt == 0) { - return; - } - memset(base + cnt, -1, tot - cnt); -} - static inline void vext_set_elem_mask(void *v0, int index, uint8_t value) { @@ -223,18 +133,6 @@ static inline void vext_set_elem_mask(void *v0, int index, ((uint64_t *)v0)[idx] = deposit64(old, pos, 1, value); } -/* - * Earlier designs (pre-0.9) had a varying number of bits - * per mask value (MLEN). In the 0.9 design, MLEN=1. - * (Section 4.5) - */ -static inline int vext_elem_mask(void *v0, int index) -{ - int idx = index / 64; - int pos = index % 64; - return (((uint64_t *)v0)[idx] >> pos) & 1; -} - /* elements operations for load and store */ typedef void vext_ldst_elem_fn(CPURISCVState *env, abi_ptr addr, uint32_t idx, void *vd, uintptr_t retaddr); @@ -729,18 +627,11 @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) * Vector Integer Arithmetic Instructions */ -/* expand macro args before macro */ -#define RVVCALL(macro, ...) macro(__VA_ARGS__) - /* (TD, T1, T2, TX1, TX2) */ #define OP_SSS_B int8_t, int8_t, int8_t, int8_t, int8_t #define OP_SSS_H int16_t, int16_t, int16_t, int16_t, int16_t #define OP_SSS_W int32_t, int32_t, int32_t, int32_t, int32_t #define OP_SSS_D int64_t, int64_t, int64_t, int64_t, int64_t -#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t -#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t -#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t -#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t #define OP_SUS_B int8_t, uint8_t, int8_t, uint8_t, int8_t #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t @@ -764,16 +655,6 @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) #define NOP_UUU_H uint16_t, uint16_t, uint32_t, uint16_t, uint32_t #define NOP_UUU_W uint32_t, uint32_t, uint64_t, uint32_t, uint64_t -/* operation of two vector elements */ -typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); - -#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ -static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ -{ \ - TX1 s1 = *((T1 *)vs1 + HS1(i)); \ - TX2 s2 = *((T2 *)vs2 + HS2(i)); \ - *((TD *)vd + HD(i)) = OP(s2, s1); \ -} #define DO_SUB(N, M) (N - M) #define DO_RSUB(N, M) (M - N) @@ -786,40 +667,6 @@ RVVCALL(OPIVV2, vsub_vv_h, OP_SSS_H, H2, H2, H2, DO_SUB) RVVCALL(OPIVV2, vsub_vv_w, OP_SSS_W, H4, H4, H4, DO_SUB) RVVCALL(OPIVV2, vsub_vv_d, OP_SSS_D, H8, H8, H8, DO_SUB) -static void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, - CPURISCVState *env, uint32_t desc, - opivv2_fn *fn, uint32_t esz) -{ - uint32_t vm = vext_vm(desc); - uint32_t vl = env->vl; - uint32_t total_elems = vext_get_total_elems(env, desc, esz); - uint32_t vta = vext_vta(desc); - uint32_t vma = vext_vma(desc); - uint32_t i; - - for (i = env->vstart; i < vl; i++) { - if (!vm && !vext_elem_mask(v0, i)) { - /* set masked-off elements to 1s */ - vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); - continue; - } - fn(vd, vs1, vs2, i); - } - env->vstart = 0; - /* set tail elements to 1s */ - vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); -} - -/* generate the helpers for OPIVV */ -#define GEN_VEXT_VV(NAME, ESZ) \ -void HELPER(NAME)(void *vd, void *v0, void *vs1, \ - void *vs2, CPURISCVState *env, \ - uint32_t desc) \ -{ \ - do_vext_vv(vd, v0, vs1, vs2, env, desc, \ - do_##NAME, ESZ); \ -} - GEN_VEXT_VV(vadd_vv_b, 1) GEN_VEXT_VV(vadd_vv_h, 2) GEN_VEXT_VV(vadd_vv_w, 4) @@ -829,18 +676,6 @@ GEN_VEXT_VV(vsub_vv_h, 2) GEN_VEXT_VV(vsub_vv_w, 4) GEN_VEXT_VV(vsub_vv_d, 8) -typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); - -/* - * (T1)s1 gives the real operator type. - * (TX1)(T1)s1 expands the operator type of widen or narrow operations. - */ -#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ -static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ -{ \ - TX2 s2 = *((T2 *)vs2 + HS2(i)); \ - *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ -} RVVCALL(OPIVX2, vadd_vx_b, OP_SSS_B, H1, H1, DO_ADD) RVVCALL(OPIVX2, vadd_vx_h, OP_SSS_H, H2, H2, DO_ADD) @@ -855,40 +690,6 @@ RVVCALL(OPIVX2, vrsub_vx_h, OP_SSS_H, H2, H2, DO_RSUB) RVVCALL(OPIVX2, vrsub_vx_w, OP_SSS_W, H4, H4, DO_RSUB) RVVCALL(OPIVX2, vrsub_vx_d, OP_SSS_D, H8, H8, DO_RSUB) -static void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, - CPURISCVState *env, uint32_t desc, - opivx2_fn fn, uint32_t esz) -{ - uint32_t vm = vext_vm(desc); - uint32_t vl = env->vl; - uint32_t total_elems = vext_get_total_elems(env, desc, esz); - uint32_t vta = vext_vta(desc); - uint32_t vma = vext_vma(desc); - uint32_t i; - - for (i = env->vstart; i < vl; i++) { - if (!vm && !vext_elem_mask(v0, i)) { - /* set masked-off elements to 1s */ - vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); - continue; - } - fn(vd, s1, vs2, i); - } - env->vstart = 0; - /* set tail elements to 1s */ - vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); -} - -/* generate the helpers for OPIVX */ -#define GEN_VEXT_VX(NAME, ESZ) \ -void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ - void *vs2, CPURISCVState *env, \ - uint32_t desc) \ -{ \ - do_vext_vx(vd, v0, s1, vs2, env, desc, \ - do_##NAME, ESZ); \ -} - GEN_VEXT_VX(vadd_vx_b, 1) GEN_VEXT_VX(vadd_vx_h, 2) GEN_VEXT_VX(vadd_vx_w, 4) diff --git a/target/riscv/vector_internals.c b/target/riscv/vector_internals.c new file mode 100644 index 0000000000..9cf5c17cde --- /dev/null +++ b/target/riscv/vector_internals.c @@ -0,0 +1,81 @@ +/* + * RISC-V Vector Extension Internals + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "vector_internals.h" + +/* set agnostic elements to 1s */ +void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, + uint32_t tot) +{ + if (is_agnostic == 0) { + /* policy undisturbed */ + return; + } + if (tot - cnt == 0) { + return ; + } + memset(base + cnt, -1, tot - cnt); +} + +void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, uint32_t desc, + opivv2_fn *fn, uint32_t esz) +{ + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); + uint32_t vma = vext_vma(desc); + uint32_t i; + + for (i = env->vstart; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); + continue; + } + fn(vd, vs1, vs2, i); + } + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); +} + +void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, uint32_t desc, + opivx2_fn fn, uint32_t esz) +{ + uint32_t vm = vext_vm(desc); + uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); + uint32_t vma = vext_vma(desc); + uint32_t i; + + for (i = env->vstart; i < vl; i++) { + if (!vm && !vext_elem_mask(v0, i)) { + /* set masked-off elements to 1s */ + vext_set_elems_1s(vd, vma, i * esz, (i + 1) * esz); + continue; + } + fn(vd, s1, vs2, i); + } + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * esz, total_elems * esz); +} diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h new file mode 100644 index 0000000000..749d138beb --- /dev/null +++ b/target/riscv/vector_internals.h @@ -0,0 +1,182 @@ +/* + * RISC-V Vector Extension Internals + * + * Copyright (c) 2020 T-Head Semiconductor Co., Ltd. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef TARGET_RISCV_VECTOR_INTERNALS_H +#define TARGET_RISCV_VECTOR_INTERNALS_H + +#include "qemu/osdep.h" +#include "qemu/bitops.h" +#include "cpu.h" +#include "tcg/tcg-gvec-desc.h" +#include "internals.h" + +static inline uint32_t vext_nf(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, NF); +} + +/* + * Note that vector data is stored in host-endian 64-bit chunks, + * so addressing units smaller than that needs a host-endian fixup. + */ +#if HOST_BIG_ENDIAN +#define H1(x) ((x) ^ 7) +#define H1_2(x) ((x) ^ 6) +#define H1_4(x) ((x) ^ 4) +#define H2(x) ((x) ^ 3) +#define H4(x) ((x) ^ 1) +#define H8(x) ((x)) +#else +#define H1(x) (x) +#define H1_2(x) (x) +#define H1_4(x) (x) +#define H2(x) (x) +#define H4(x) (x) +#define H8(x) (x) +#endif + +/* + * Encode LMUL to lmul as following: + * LMUL vlmul lmul + * 1 000 0 + * 2 001 1 + * 4 010 2 + * 8 011 3 + * - 100 - + * 1/8 101 -3 + * 1/4 110 -2 + * 1/2 111 -1 + */ +static inline int32_t vext_lmul(uint32_t desc) +{ + return sextract32(FIELD_EX32(simd_data(desc), VDATA, LMUL), 0, 3); +} + +static inline uint32_t vext_vm(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VM); +} + +static inline uint32_t vext_vma(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VMA); +} + +static inline uint32_t vext_vta(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VTA); +} + +static inline uint32_t vext_vta_all_1s(uint32_t desc) +{ + return FIELD_EX32(simd_data(desc), VDATA, VTA_ALL_1S); +} + +/* + * Earlier designs (pre-0.9) had a varying number of bits + * per mask value (MLEN). In the 0.9 design, MLEN=1. + * (Section 4.5) + */ +static inline int vext_elem_mask(void *v0, int index) +{ + int idx = index / 64; + int pos = index % 64; + return (((uint64_t *)v0)[idx] >> pos) & 1; +} + +/* + * Get number of total elements, including prestart, body and tail elements. + * Note that when LMUL < 1, the tail includes the elements past VLMAX that + * are held in the same vector register. + */ +static inline uint32_t vext_get_total_elems(CPURISCVState *env, uint32_t desc, + uint32_t esz) +{ + uint32_t vlenb = simd_maxsz(desc); + uint32_t sew = 1 << FIELD_EX64(env->vtype, VTYPE, VSEW); + int8_t emul = ctzl(esz) - ctzl(sew) + vext_lmul(desc) < 0 ? 0 : + ctzl(esz) - ctzl(sew) + vext_lmul(desc); + return (vlenb << emul) / esz; +} + +/* set agnostic elements to 1s */ +void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, + uint32_t tot); + +/* expand macro args before macro */ +#define RVVCALL(macro, ...) macro(__VA_ARGS__) + +/* (TD, T1, T2, TX1, TX2) */ +#define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t +#define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t +#define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t +#define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t + +/* operation of two vector elements */ +typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); + +#define OPIVV2(NAME, TD, T1, T2, TX1, TX2, HD, HS1, HS2, OP) \ +static void do_##NAME(void *vd, void *vs1, void *vs2, int i) \ +{ \ + TX1 s1 = *((T1 *)vs1 + HS1(i)); \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, s1); \ +} + +void do_vext_vv(void *vd, void *v0, void *vs1, void *vs2, + CPURISCVState *env, uint32_t desc, + opivv2_fn *fn, uint32_t esz); + +/* generate the helpers for OPIVV */ +#define GEN_VEXT_VV(NAME, ESZ) \ +void HELPER(NAME)(void *vd, void *v0, void *vs1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + do_vext_vv(vd, v0, vs1, vs2, env, desc, \ + do_##NAME, ESZ); \ +} + +typedef void opivx2_fn(void *vd, target_long s1, void *vs2, int i); + +/* + * (T1)s1 gives the real operator type. + * (TX1)(T1)s1 expands the operator type of widen or narrow operations. + */ +#define OPIVX2(NAME, TD, T1, T2, TX1, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, target_long s1, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2, (TX1)(T1)s1); \ +} + +void do_vext_vx(void *vd, void *v0, target_long s1, void *vs2, + CPURISCVState *env, uint32_t desc, + opivx2_fn fn, uint32_t esz); + +/* generate the helpers for OPIVX */ +#define GEN_VEXT_VX(NAME, ESZ) \ +void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ + void *vs2, CPURISCVState *env, \ + uint32_t desc) \ +{ \ + do_vext_vx(vd, v0, s1, vs2, env, desc, \ + do_##NAME, ESZ); \ +} + +#endif /* TARGET_RISCV_VECTOR_INTERNALS_H */ From a44f19f611ad6358189c58b3c39012f68613e6f9 Mon Sep 17 00:00:00 2001 From: Kiran Ostrolenk Date: Wed, 12 Jul 2023 00:59:01 +0800 Subject: [PATCH 09/45] target/riscv: Refactor vector-vector translation macro Refactor the non SEW-specific stuff out of `GEN_OPIVV_TRANS` into function `opivv_trans` (similar to `opivi_trans`). `opivv_trans` will be used in proceeding vector-crypto commits. Signed-off-by: Kiran Ostrolenk Reviewed-by: Richard Henderson Reviewed-by: Alistair Francis Reviewed-by: Weiwei Li Signed-off-by: Max Chou Message-ID: <20230711165917.2629866-3-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/insn_trans/trans_rvv.c.inc | 62 +++++++++++++------------ 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 6ab63f4442..f9dcb747a6 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1643,38 +1643,40 @@ GEN_OPIWX_WIDEN_TRANS(vwadd_wx) GEN_OPIWX_WIDEN_TRANS(vwsubu_wx) GEN_OPIWX_WIDEN_TRANS(vwsub_wx) +static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm, + gen_helper_gvec_4_ptr *fn, DisasContext *s) +{ + uint32_t data = 0; + TCGLabel *over = gen_new_label(); + tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); + + data = FIELD_DP32(data, VDATA, VM, vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); + data = FIELD_DP32(data, VDATA, VMA, s->vma); + tcg_gen_gvec_4_ptr(vreg_ofs(s, vd), vreg_ofs(s, 0), vreg_ofs(s, vs1), + vreg_ofs(s, vs2), cpu_env, s->cfg_ptr->vlen / 8, + s->cfg_ptr->vlen / 8, data, fn); + mark_vs_dirty(s); + gen_set_label(over); + return true; +} + /* Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions */ /* OPIVV without GVEC IR */ -#define GEN_OPIVV_TRANS(NAME, CHECK) \ -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ -{ \ - if (CHECK(s, a)) { \ - uint32_t data = 0; \ - static gen_helper_gvec_4_ptr * const fns[4] = { \ - gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ - gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ - }; \ - TCGLabel *over = gen_new_label(); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ - tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ - \ - data = FIELD_DP32(data, VDATA, VM, a->vm); \ - data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ - data = FIELD_DP32(data, VDATA, VTA, s->vta); \ - data = \ - FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s);\ - data = FIELD_DP32(data, VDATA, VMA, s->vma); \ - tcg_gen_gvec_4_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ - vreg_ofs(s, a->rs1), \ - vreg_ofs(s, a->rs2), cpu_env, \ - s->cfg_ptr->vlen / 8, \ - s->cfg_ptr->vlen / 8, data, \ - fns[s->sew]); \ - mark_vs_dirty(s); \ - gen_set_label(over); \ - return true; \ - } \ - return false; \ +#define GEN_OPIVV_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_gvec_4_ptr * const fns[4] = { \ + gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ + }; \ + return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s);\ + } \ + return false; \ } /* From 922f87351101379e3c2487120c394e51cf53984f Mon Sep 17 00:00:00 2001 From: Nazar Kazakov Date: Wed, 12 Jul 2023 00:59:02 +0800 Subject: [PATCH 10/45] target/riscv: Remove redundant "cpu_vl == 0" checks Remove the redundant "vl == 0" check which is already included within the vstart >= vl check, when vl == 0. Signed-off-by: Nazar Kazakov Reviewed-by: Weiwei Li Signed-off-by: Max Chou Acked-by: Alistair Francis Message-ID: <20230711165917.2629866-4-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/insn_trans/trans_rvv.c.inc | 31 +------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index f9dcb747a6..bf7384c065 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -617,7 +617,6 @@ static bool ldst_us_trans(uint32_t vd, uint32_t rs1, uint32_t data, TCGv_i32 desc; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -786,7 +785,6 @@ static bool ldst_stride_trans(uint32_t vd, uint32_t rs1, uint32_t rs2, TCGv_i32 desc; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -893,7 +891,6 @@ static bool ldst_index_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, TCGv_i32 desc; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -1034,7 +1031,6 @@ static bool ldff_trans(uint32_t vd, uint32_t rs1, uint32_t data, TCGv_i32 desc; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -1191,7 +1187,6 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, return false; } - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { @@ -1241,7 +1236,6 @@ static bool opivx_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, uint32_t vm, uint32_t data = 0; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -1405,7 +1399,6 @@ static bool opivi_trans(uint32_t vd, uint32_t imm, uint32_t vs2, uint32_t vm, uint32_t data = 0; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -1492,7 +1485,6 @@ static bool do_opivv_widen(DisasContext *s, arg_rmrr *a, if (checkfn(s, a)) { uint32_t data = 0; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); @@ -1575,7 +1567,6 @@ static bool do_opiwv_widen(DisasContext *s, arg_rmrr *a, if (opiwv_widen_check(s, a)) { uint32_t data = 0; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); @@ -1648,7 +1639,6 @@ static bool opivv_trans(uint32_t vd, uint32_t vs1, uint32_t vs2, uint32_t vm, { uint32_t data = 0; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, vm); @@ -1842,7 +1832,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_helper_##NAME##_w, \ }; \ TCGLabel *over = gen_new_label(); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2054,7 +2043,6 @@ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) gen_helper_vmv_v_v_w, gen_helper_vmv_v_v_d, }; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); tcg_gen_gvec_2_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), @@ -2078,7 +2066,6 @@ static bool trans_vmv_v_x(DisasContext *s, arg_vmv_v_x *a) vext_check_ss(s, a->rd, 0, 1)) { TCGv s1; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); s1 = get_gpr(s, a->rs1, EXT_SIGN); @@ -2140,7 +2127,6 @@ static bool trans_vmv_v_i(DisasContext *s, arg_vmv_v_i *a) gen_helper_vmv_v_x_w, gen_helper_vmv_v_x_d, }; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); s1 = tcg_constant_i64(simm); @@ -2288,7 +2274,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2323,7 +2308,6 @@ static bool opfvf_trans(uint32_t vd, uint32_t rs1, uint32_t vs2, TCGv_i64 t1; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); dest = tcg_temp_new_ptr(); @@ -2408,7 +2392,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over);\ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2483,7 +2466,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2601,7 +2583,6 @@ static bool do_opfv(DisasContext *s, arg_rmr *a, uint32_t data = 0; TCGLabel *over = gen_new_label(); gen_set_rm_chkfrm(s, rm); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); @@ -2713,7 +2694,6 @@ static bool trans_vfmv_v_f(DisasContext *s, arg_vfmv_v_f *a) gen_helper_vmv_v_x_d, }; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); t1 = tcg_temp_new_i64(); @@ -2792,7 +2772,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm_chkfrm(s, FRM); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2844,7 +2823,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm(s, RISCV_FRM_DYN); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2912,7 +2890,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm_chkfrm(s, FRM); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -2962,7 +2939,6 @@ static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ }; \ TCGLabel *over = gen_new_label(); \ gen_set_rm_chkfrm(s, FRM); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, VM, a->vm); \ @@ -3053,7 +3029,6 @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \ uint32_t data = 0; \ gen_helper_gvec_4_ptr *fn = gen_helper_##NAME; \ TCGLabel *over = gen_new_label(); \ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); \ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ \ data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ @@ -3224,7 +3199,6 @@ static bool trans_vid_v(DisasContext *s, arg_vid_v *a) require_vm(a->vm, a->rd)) { uint32_t data = 0; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); data = FIELD_DP32(data, VDATA, VM, a->vm); @@ -3411,7 +3385,6 @@ static bool trans_vmv_s_x(DisasContext *s, arg_vmv_s_x *a) TCGv s1; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); t1 = tcg_temp_new_i64(); @@ -3468,8 +3441,7 @@ static bool trans_vfmv_s_f(DisasContext *s, arg_vfmv_s_f *a) TCGv_i64 t1; TCGLabel *over = gen_new_label(); - /* if vl == 0 or vstart >= vl, skip vector register write back */ - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); + /* if vstart >= vl, skip vector register write back */ tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); /* NaN-box f[rs1] */ @@ -3720,7 +3692,6 @@ static bool int_ext_op(DisasContext *s, arg_rmr *a, uint8_t seq) uint32_t data = 0; gen_helper_gvec_3_ptr *fn; TCGLabel *over = gen_new_label(); - tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_vl, 0, over); tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); static gen_helper_gvec_3_ptr * const fns[6][4] = { From e13c7d3b5bfd632b3a39217843b28e185c366fc2 Mon Sep 17 00:00:00 2001 From: Lawrence Hunter Date: Wed, 12 Jul 2023 00:59:03 +0800 Subject: [PATCH 11/45] target/riscv: Add Zvbc ISA extension support This commit adds support for the Zvbc vector-crypto extension, which consists of the following instructions: * vclmulh.[vx,vv] * vclmul.[vx,vv] Translation functions are defined in `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in `target/riscv/vcrypto_helper.c`. Co-authored-by: Nazar Kazakov Co-authored-by: Max Chou Signed-off-by: Nazar Kazakov Signed-off-by: Lawrence Hunter Signed-off-by: Max Chou [max.chou@sifive.com: Exposed x-zvbc property] Message-ID: <20230711165917.2629866-5-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 9 ++++ target/riscv/cpu_cfg.h | 1 + target/riscv/helper.h | 6 +++ target/riscv/insn32.decode | 6 +++ target/riscv/insn_trans/trans_rvvk.c.inc | 62 ++++++++++++++++++++++++ target/riscv/meson.build | 3 +- target/riscv/translate.c | 1 + target/riscv/vcrypto_helper.c | 59 ++++++++++++++++++++++ 8 files changed, 146 insertions(+), 1 deletion(-) create mode 100644 target/riscv/insn_trans/trans_rvvk.c.inc create mode 100644 target/riscv/vcrypto_helper.c diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 921c19e6cd..f74e0926c2 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -120,6 +120,7 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed), ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh), ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt), + ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc), ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f), ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f), ISA_EXT_DATA_ENTRY(zve64d, PRIV_VERSION_1_10_0, ext_zve64d), @@ -1271,6 +1272,11 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } + if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) { + error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions"); + return; + } + if (cpu->cfg.ext_zk) { cpu->cfg.ext_zkn = true; cpu->cfg.ext_zkr = true; @@ -1853,6 +1859,9 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("x-zvfbfmin", RISCVCPU, cfg.ext_zvfbfmin, false), DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false), + /* Vector cryptography extensions */ + DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), + DEFINE_PROP_END_OF_LIST(), }; diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index 2bd9510ba3..d25b36a512 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -85,6 +85,7 @@ struct RISCVCPUConfig { bool ext_zve32f; bool ext_zve64f; bool ext_zve64d; + bool ext_zvbc; bool ext_zmmul; bool ext_zvfbfmin; bool ext_zvfbfwma; diff --git a/target/riscv/helper.h b/target/riscv/helper.h index c95adaf08a..6776777c4e 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1182,3 +1182,9 @@ DEF_HELPER_5(vfwcvtbf16_f_f_v, void, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwmaccbf16_vv, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vfwmaccbf16_vf, void, ptr, ptr, i64, ptr, env, i32) + +/* Vector crypto functions */ +DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index e341fa9213..dd50d5a48c 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -946,3 +946,9 @@ vfwcvtbf16_f_f_v 010010 . ..... 01101 001 ..... 1010111 @r2_vm # *** Zvfbfwma Standard Extension *** vfwmaccbf16_vv 111011 . ..... ..... 001 ..... 1010111 @r_vm vfwmaccbf16_vf 111011 . ..... ..... 101 ..... 1010111 @r_vm + +# *** Zvbc vector crypto extension *** +vclmul_vv 001100 . ..... ..... 010 ..... 1010111 @r_vm +vclmul_vx 001100 . ..... ..... 110 ..... 1010111 @r_vm +vclmulh_vv 001101 . ..... ..... 010 ..... 1010111 @r_vm +vclmulh_vx 001101 . ..... ..... 110 ..... 1010111 @r_vm diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc new file mode 100644 index 0000000000..552b08a2fd --- /dev/null +++ b/target/riscv/insn_trans/trans_rvvk.c.inc @@ -0,0 +1,62 @@ +/* + * RISC-V translation routines for the vector crypto extension. + * + * Copyright (C) 2023 SiFive, Inc. + * Written by Codethink Ltd and SiFive. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +/* + * Zvbc + */ + +#define GEN_VV_MASKED_TRANS(NAME, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + return opivv_trans(a->rd, a->rs1, a->rs2, a->vm, \ + gen_helper_##NAME, s); \ + } \ + return false; \ + } + +static bool vclmul_vv_check(DisasContext *s, arg_rmrr *a) +{ + return opivv_check(s, a) && + s->cfg_ptr->ext_zvbc == true && + s->sew == MO_64; +} + +GEN_VV_MASKED_TRANS(vclmul_vv, vclmul_vv_check) +GEN_VV_MASKED_TRANS(vclmulh_vv, vclmul_vv_check) + +#define GEN_VX_MASKED_TRANS(NAME, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, \ + gen_helper_##NAME, s); \ + } \ + return false; \ + } + +static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a) +{ + return opivx_check(s, a) && + s->cfg_ptr->ext_zvbc == true && + s->sew == MO_64; +} + +GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check) +GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check) diff --git a/target/riscv/meson.build b/target/riscv/meson.build index c3801ee5e0..660078bda1 100644 --- a/target/riscv/meson.build +++ b/target/riscv/meson.build @@ -21,7 +21,8 @@ riscv_ss.add(files( 'translate.c', 'm128_helper.c', 'crypto_helper.c', - 'zce_helper.c' + 'zce_helper.c', + 'vcrypto_helper.c' )) riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'), if_false: files('kvm-stub.c')) diff --git a/target/riscv/translate.c b/target/riscv/translate.c index 697df1be9e..7dbf173adb 100644 --- a/target/riscv/translate.c +++ b/target/riscv/translate.c @@ -1094,6 +1094,7 @@ static uint32_t opcode_at(DisasContextBase *dcbase, target_ulong pc) #include "insn_trans/trans_rvzfa.c.inc" #include "insn_trans/trans_rvzfh.c.inc" #include "insn_trans/trans_rvk.c.inc" +#include "insn_trans/trans_rvvk.c.inc" #include "insn_trans/trans_privileged.c.inc" #include "insn_trans/trans_svinval.c.inc" #include "insn_trans/trans_rvbf16.c.inc" diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c new file mode 100644 index 0000000000..8b7c63d499 --- /dev/null +++ b/target/riscv/vcrypto_helper.c @@ -0,0 +1,59 @@ +/* + * RISC-V Vector Crypto Extension Helpers for QEMU. + * + * Copyright (C) 2023 SiFive, Inc. + * Written by Codethink Ltd and SiFive. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2 or later, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "qemu/osdep.h" +#include "qemu/host-utils.h" +#include "qemu/bitops.h" +#include "cpu.h" +#include "exec/memop.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "internals.h" +#include "vector_internals.h" + +static uint64_t clmul64(uint64_t y, uint64_t x) +{ + uint64_t result = 0; + for (int j = 63; j >= 0; j--) { + if ((y >> j) & 1) { + result ^= (x << j); + } + } + return result; +} + +static uint64_t clmulh64(uint64_t y, uint64_t x) +{ + uint64_t result = 0; + for (int j = 63; j >= 1; j--) { + if ((y >> j) & 1) { + result ^= (x >> (64 - j)); + } + } + return result; +} + +RVVCALL(OPIVV2, vclmul_vv, OP_UUU_D, H8, H8, H8, clmul64) +GEN_VEXT_VV(vclmul_vv, 8) +RVVCALL(OPIVX2, vclmul_vx, OP_UUU_D, H8, H8, clmul64) +GEN_VEXT_VX(vclmul_vx, 8) +RVVCALL(OPIVV2, vclmulh_vv, OP_UUU_D, H8, H8, H8, clmulh64) +GEN_VEXT_VV(vclmulh_vv, 8) +RVVCALL(OPIVX2, vclmulh_vx, OP_UUU_D, H8, H8, clmulh64) +GEN_VEXT_VX(vclmulh_vx, 8) From 1ac7a501f0630cf4293a892d884791278fdbb88b Mon Sep 17 00:00:00 2001 From: Nazar Kazakov Date: Wed, 12 Jul 2023 00:59:04 +0800 Subject: [PATCH 12/45] target/riscv: Move vector translation checks Move the checks out of `do_opiv{v,x,i}_gvec{,_shift}` functions and into the corresponding macros. This enables the functions to be reused in proceeding commits without check duplication. Signed-off-by: Nazar Kazakov Reviewed-by: Richard Henderson Reviewed-by: Weiwei Li Signed-off-by: Max Chou Message-ID: <20230711165917.2629866-6-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/insn_trans/trans_rvv.c.inc | 28 +++++++++++-------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index bf7384c065..641cf5da6f 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1183,9 +1183,6 @@ do_opivv_gvec(DisasContext *s, arg_rmrr *a, GVecGen3Fn *gvec_fn, gen_helper_gvec_4_ptr *fn) { TCGLabel *over = gen_new_label(); - if (!opivv_check(s, a)) { - return false; - } tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); @@ -1218,6 +1215,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ }; \ + if (!opivv_check(s, a)) { \ + return false; \ + } \ return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ } @@ -1276,10 +1276,6 @@ static inline bool do_opivx_gvec(DisasContext *s, arg_rmrr *a, GVecGen2sFn *gvec_fn, gen_helper_opivx *fn) { - if (!opivx_check(s, a)) { - return false; - } - if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { TCGv_i64 src1 = tcg_temp_new_i64(); @@ -1301,6 +1297,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ }; \ + if (!opivx_check(s, a)) { \ + return false; \ + } \ return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ } @@ -1432,10 +1431,6 @@ static inline bool do_opivi_gvec(DisasContext *s, arg_rmrr *a, GVecGen2iFn *gvec_fn, gen_helper_opivx *fn, imm_mode_t imm_mode) { - if (!opivx_check(s, a)) { - return false; - } - if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { gvec_fn(s->sew, vreg_ofs(s, a->rd), vreg_ofs(s, a->rs2), extract_imm(s, a->rs1, imm_mode), MAXSZ(s), MAXSZ(s)); @@ -1453,6 +1448,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_helper_##OPIVX##_b, gen_helper_##OPIVX##_h, \ gen_helper_##OPIVX##_w, gen_helper_##OPIVX##_d, \ }; \ + if (!opivx_check(s, a)) { \ + return false; \ + } \ return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, \ fns[s->sew], IMM_MODE); \ } @@ -1775,10 +1773,6 @@ static inline bool do_opivx_gvec_shift(DisasContext *s, arg_rmrr *a, GVecGen2sFn32 *gvec_fn, gen_helper_opivx *fn) { - if (!opivx_check(s, a)) { - return false; - } - if (a->vm && s->vl_eq_vlmax && !(s->vta && s->lmul < 0)) { TCGv_i32 src1 = tcg_temp_new_i32(); @@ -1800,7 +1794,9 @@ static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ gen_helper_##NAME##_b, gen_helper_##NAME##_h, \ gen_helper_##NAME##_w, gen_helper_##NAME##_d, \ }; \ - \ + if (!opivx_check(s, a)) { \ + return false; \ + } \ return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ } From 62cb3e8e887bf279c7af5af4c971a44d2fd8a516 Mon Sep 17 00:00:00 2001 From: Dickon Hood Date: Wed, 12 Jul 2023 00:59:05 +0800 Subject: [PATCH 13/45] target/riscv: Refactor translation of vector-widening instruction Zvbb (implemented in later commit) has a widening instruction, which requires an extra check on the enabled extensions. Refactor GEN_OPIVX_WIDEN_TRANS() to take a check function to avoid reimplementing it. Signed-off-by: Dickon Hood Reviewed-by: Richard Henderson Reviewed-by: Weiwei Li Signed-off-by: Max Chou Message-ID: <20230711165917.2629866-7-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/insn_trans/trans_rvv.c.inc | 52 +++++++++++-------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvv.c.inc b/target/riscv/insn_trans/trans_rvv.c.inc index 641cf5da6f..63404f61fc 100644 --- a/target/riscv/insn_trans/trans_rvv.c.inc +++ b/target/riscv/insn_trans/trans_rvv.c.inc @@ -1526,30 +1526,24 @@ static bool opivx_widen_check(DisasContext *s, arg_rmrr *a) vext_check_ds(s, a->rd, a->rs2, a->vm); } -static bool do_opivx_widen(DisasContext *s, arg_rmrr *a, - gen_helper_opivx *fn) -{ - if (opivx_widen_check(s, a)) { - return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fn, s); - } - return false; +#define GEN_OPIVX_WIDEN_TRANS(NAME, CHECK) \ +static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ +{ \ + if (CHECK(s, a)) { \ + static gen_helper_opivx * const fns[3] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w \ + }; \ + return opivx_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s); \ + } \ + return false; \ } -#define GEN_OPIVX_WIDEN_TRANS(NAME) \ -static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ -{ \ - static gen_helper_opivx * const fns[3] = { \ - gen_helper_##NAME##_b, \ - gen_helper_##NAME##_h, \ - gen_helper_##NAME##_w \ - }; \ - return do_opivx_widen(s, a, fns[s->sew]); \ -} - -GEN_OPIVX_WIDEN_TRANS(vwaddu_vx) -GEN_OPIVX_WIDEN_TRANS(vwadd_vx) -GEN_OPIVX_WIDEN_TRANS(vwsubu_vx) -GEN_OPIVX_WIDEN_TRANS(vwsub_vx) +GEN_OPIVX_WIDEN_TRANS(vwaddu_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwadd_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwsubu_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwsub_vx, opivx_widen_check) /* WIDEN OPIVV with WIDEN */ static bool opiwv_widen_check(DisasContext *s, arg_rmrr *a) @@ -1997,9 +1991,9 @@ GEN_OPIVX_TRANS(vrem_vx, opivx_check) GEN_OPIVV_WIDEN_TRANS(vwmul_vv, opivv_widen_check) GEN_OPIVV_WIDEN_TRANS(vwmulu_vv, opivv_widen_check) GEN_OPIVV_WIDEN_TRANS(vwmulsu_vv, opivv_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmul_vx) -GEN_OPIVX_WIDEN_TRANS(vwmulu_vx) -GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx) +GEN_OPIVX_WIDEN_TRANS(vwmul_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmulu_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmulsu_vx, opivx_widen_check) /* Vector Single-Width Integer Multiply-Add Instructions */ GEN_OPIVV_TRANS(vmacc_vv, opivv_check) @@ -2015,10 +2009,10 @@ GEN_OPIVX_TRANS(vnmsub_vx, opivx_check) GEN_OPIVV_WIDEN_TRANS(vwmaccu_vv, opivv_widen_check) GEN_OPIVV_WIDEN_TRANS(vwmacc_vv, opivv_widen_check) GEN_OPIVV_WIDEN_TRANS(vwmaccsu_vv, opivv_widen_check) -GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx) -GEN_OPIVX_WIDEN_TRANS(vwmacc_vx) -GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx) -GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx) +GEN_OPIVX_WIDEN_TRANS(vwmaccu_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmacc_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccsu_vx, opivx_widen_check) +GEN_OPIVX_WIDEN_TRANS(vwmaccus_vx, opivx_widen_check) /* Vector Integer Merge and Move Instructions */ static bool trans_vmv_v_v(DisasContext *s, arg_vmv_v_v *a) From 2152e48b501de38fcd497ef0188238e46e320f5f Mon Sep 17 00:00:00 2001 From: Kiran Ostrolenk Date: Wed, 12 Jul 2023 00:59:06 +0800 Subject: [PATCH 14/45] target/riscv: Refactor some of the generic vector functionality Move some macros out of `vector_helper` and into `vector_internals`. This ensures they can be used by both vector and vector-crypto helpers (latter implemented in proceeding commits). Signed-off-by: Kiran Ostrolenk Reviewed-by: Weiwei Li Signed-off-by: Max Chou Message-ID: <20230711165917.2629866-8-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/vector_helper.c | 42 ------------------------------ target/riscv/vector_internals.h | 46 +++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 42 deletions(-) diff --git a/target/riscv/vector_helper.c b/target/riscv/vector_helper.c index 1f29236a63..3fb05cc3d6 100644 --- a/target/riscv/vector_helper.c +++ b/target/riscv/vector_helper.c @@ -636,9 +636,6 @@ GEN_VEXT_ST_WHOLE(vs8r_v, int8_t, ste_b) #define OP_SUS_H int16_t, uint16_t, int16_t, uint16_t, int16_t #define OP_SUS_W int32_t, uint32_t, int32_t, uint32_t, int32_t #define OP_SUS_D int64_t, uint64_t, int64_t, uint64_t, int64_t -#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t -#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t -#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t #define WOP_SSS_B int16_t, int8_t, int8_t, int16_t, int16_t #define WOP_SSS_H int32_t, int16_t, int16_t, int32_t, int32_t #define WOP_SSS_W int64_t, int32_t, int32_t, int64_t, int64_t @@ -3438,11 +3435,6 @@ GEN_VEXT_VF(vfwnmsac_vf_h, 4) GEN_VEXT_VF(vfwnmsac_vf_w, 8) /* Vector Floating-Point Square-Root Instruction */ -/* (TD, T2, TX2) */ -#define OP_UU_H uint16_t, uint16_t, uint16_t -#define OP_UU_W uint32_t, uint32_t, uint32_t -#define OP_UU_D uint64_t, uint64_t, uint64_t - #define OPFVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ static void do_##NAME(void *vd, void *vs2, int i, \ CPURISCVState *env) \ @@ -4139,40 +4131,6 @@ GEN_VEXT_CMP_VF(vmfge_vf_w, uint32_t, H4, vmfge32) GEN_VEXT_CMP_VF(vmfge_vf_d, uint64_t, H8, vmfge64) /* Vector Floating-Point Classify Instruction */ -#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ -static void do_##NAME(void *vd, void *vs2, int i) \ -{ \ - TX2 s2 = *((T2 *)vs2 + HS2(i)); \ - *((TD *)vd + HD(i)) = OP(s2); \ -} - -#define GEN_VEXT_V(NAME, ESZ) \ -void HELPER(NAME)(void *vd, void *v0, void *vs2, \ - CPURISCVState *env, uint32_t desc) \ -{ \ - uint32_t vm = vext_vm(desc); \ - uint32_t vl = env->vl; \ - uint32_t total_elems = \ - vext_get_total_elems(env, desc, ESZ); \ - uint32_t vta = vext_vta(desc); \ - uint32_t vma = vext_vma(desc); \ - uint32_t i; \ - \ - for (i = env->vstart; i < vl; i++) { \ - if (!vm && !vext_elem_mask(v0, i)) { \ - /* set masked-off elements to 1s */ \ - vext_set_elems_1s(vd, vma, i * ESZ, \ - (i + 1) * ESZ); \ - continue; \ - } \ - do_##NAME(vd, vs2, i); \ - } \ - env->vstart = 0; \ - /* set tail elements to 1s */ \ - vext_set_elems_1s(vd, vta, vl * ESZ, \ - total_elems * ESZ); \ -} - target_ulong fclass_h(uint64_t frs1) { float16 f = frs1; diff --git a/target/riscv/vector_internals.h b/target/riscv/vector_internals.h index 749d138beb..8133111e5f 100644 --- a/target/riscv/vector_internals.h +++ b/target/riscv/vector_internals.h @@ -121,12 +121,52 @@ void vext_set_elems_1s(void *base, uint32_t is_agnostic, uint32_t cnt, /* expand macro args before macro */ #define RVVCALL(macro, ...) macro(__VA_ARGS__) +/* (TD, T2, TX2) */ +#define OP_UU_B uint8_t, uint8_t, uint8_t +#define OP_UU_H uint16_t, uint16_t, uint16_t +#define OP_UU_W uint32_t, uint32_t, uint32_t +#define OP_UU_D uint64_t, uint64_t, uint64_t + /* (TD, T1, T2, TX1, TX2) */ #define OP_UUU_B uint8_t, uint8_t, uint8_t, uint8_t, uint8_t #define OP_UUU_H uint16_t, uint16_t, uint16_t, uint16_t, uint16_t #define OP_UUU_W uint32_t, uint32_t, uint32_t, uint32_t, uint32_t #define OP_UUU_D uint64_t, uint64_t, uint64_t, uint64_t, uint64_t +#define OPIVV1(NAME, TD, T2, TX2, HD, HS2, OP) \ +static void do_##NAME(void *vd, void *vs2, int i) \ +{ \ + TX2 s2 = *((T2 *)vs2 + HS2(i)); \ + *((TD *)vd + HD(i)) = OP(s2); \ +} + +#define GEN_VEXT_V(NAME, ESZ) \ +void HELPER(NAME)(void *vd, void *v0, void *vs2, \ + CPURISCVState *env, uint32_t desc) \ +{ \ + uint32_t vm = vext_vm(desc); \ + uint32_t vl = env->vl; \ + uint32_t total_elems = \ + vext_get_total_elems(env, desc, ESZ); \ + uint32_t vta = vext_vta(desc); \ + uint32_t vma = vext_vma(desc); \ + uint32_t i; \ + \ + for (i = env->vstart; i < vl; i++) { \ + if (!vm && !vext_elem_mask(v0, i)) { \ + /* set masked-off elements to 1s */ \ + vext_set_elems_1s(vd, vma, i * ESZ, \ + (i + 1) * ESZ); \ + continue; \ + } \ + do_##NAME(vd, vs2, i); \ + } \ + env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * ESZ, \ + total_elems * ESZ); \ +} + /* operation of two vector elements */ typedef void opivv2_fn(void *vd, void *vs1, void *vs2, int i); @@ -179,4 +219,10 @@ void HELPER(NAME)(void *vd, void *v0, target_ulong s1, \ do_##NAME, ESZ); \ } +/* Three of the widening shortening macros: */ +/* (TD, T1, T2, TX1, TX2) */ +#define WOP_UUU_B uint16_t, uint8_t, uint8_t, uint16_t, uint16_t +#define WOP_UUU_H uint32_t, uint16_t, uint16_t, uint32_t, uint32_t +#define WOP_UUU_W uint64_t, uint32_t, uint32_t, uint64_t, uint64_t + #endif /* TARGET_RISCV_VECTOR_INTERNALS_H */ From 0602847289feed9c5abd25ebe5604596c9d4bdbe Mon Sep 17 00:00:00 2001 From: Dickon Hood Date: Wed, 12 Jul 2023 00:59:07 +0800 Subject: [PATCH 15/45] target/riscv: Add Zvbb ISA extension support This commit adds support for the Zvbb vector-crypto extension, which consists of the following instructions: * vrol.[vv,vx] * vror.[vv,vx,vi] * vbrev8.v * vrev8.v * vandn.[vv,vx] * vbrev.v * vclz.v * vctz.v * vcpop.v * vwsll.[vv,vx,vi] Translation functions are defined in `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in `target/riscv/vcrypto_helper.c`. Co-authored-by: Nazar Kazakov Co-authored-by: William Salmon Co-authored-by: Kiran Ostrolenk [max.chou@sifive.com: Fix imm mode of vror.vi] Signed-off-by: Nazar Kazakov Signed-off-by: William Salmon Signed-off-by: Kiran Ostrolenk Signed-off-by: Dickon Hood Signed-off-by: Max Chou Reviewed-by: Daniel Henrique Barboza [max.chou@sifive.com: Exposed x-zvbb property] Message-ID: <20230711165917.2629866-9-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 12 ++ target/riscv/cpu_cfg.h | 1 + target/riscv/helper.h | 62 +++++++++ target/riscv/insn32.decode | 20 +++ target/riscv/insn_trans/trans_rvvk.c.inc | 164 +++++++++++++++++++++++ target/riscv/vcrypto_helper.c | 138 +++++++++++++++++++ 6 files changed, 397 insertions(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index f74e0926c2..ccffbad4f5 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -120,6 +120,7 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zksed, PRIV_VERSION_1_12_0, ext_zksed), ISA_EXT_DATA_ENTRY(zksh, PRIV_VERSION_1_12_0, ext_zksh), ISA_EXT_DATA_ENTRY(zkt, PRIV_VERSION_1_12_0, ext_zkt), + ISA_EXT_DATA_ENTRY(zvbb, PRIV_VERSION_1_12_0, ext_zvbb), ISA_EXT_DATA_ENTRY(zvbc, PRIV_VERSION_1_12_0, ext_zvbc), ISA_EXT_DATA_ENTRY(zve32f, PRIV_VERSION_1_10_0, ext_zve32f), ISA_EXT_DATA_ENTRY(zve64f, PRIV_VERSION_1_10_0, ext_zve64f), @@ -1272,6 +1273,16 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) return; } + /* + * In principle Zve*x would also suffice here, were they supported + * in qemu + */ + if (cpu->cfg.ext_zvbb && !cpu->cfg.ext_zve32f) { + error_setg(errp, + "Vector crypto extensions require V or Zve* extensions"); + return; + } + if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) { error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions"); return; @@ -1860,6 +1871,7 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("x-zvfbfwma", RISCVCPU, cfg.ext_zvfbfwma, false), /* Vector cryptography extensions */ + DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), DEFINE_PROP_END_OF_LIST(), diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index d25b36a512..0e31ebeed9 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -85,6 +85,7 @@ struct RISCVCPUConfig { bool ext_zve32f; bool ext_zve64f; bool ext_zve64d; + bool ext_zvbb; bool ext_zvbc; bool ext_zmmul; bool ext_zvfbfmin; diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 6776777c4e..3db25ed2a2 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1188,3 +1188,65 @@ DEF_HELPER_6(vclmul_vv, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vclmul_vx, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vclmulh_vv, void, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_6(vclmulh_vx, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vror_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vror_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vror_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vror_vv_d, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vror_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vror_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vror_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vror_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vrol_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrol_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrol_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vrol_vv_d, void, ptr, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vrol_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrol_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrol_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vrol_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_5(vrev8_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vrev8_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vrev8_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vrev8_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev8_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev8_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev8_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev8_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vbrev_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(vclz_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vclz_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vclz_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vclz_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vctz_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vctz_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vctz_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vctz_v_d, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vcpop_v_b, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vcpop_v_h, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vcpop_v_w, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vcpop_v_d, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_6(vwsll_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsll_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsll_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vwsll_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsll_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vwsll_vx_w, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_6(vandn_vv_b, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vandn_vv_h, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vandn_vv_w, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vandn_vv_d, void, ptr, ptr, ptr, ptr, env, i32) +DEF_HELPER_6(vandn_vx_b, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vandn_vx_h, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vandn_vx_w, void, ptr, ptr, tl, ptr, env, i32) +DEF_HELPER_6(vandn_vx_d, void, ptr, ptr, tl, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index dd50d5a48c..b982a8325b 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -37,6 +37,7 @@ %imm_u 12:s20 !function=ex_shift_12 %imm_bs 30:2 !function=ex_shift_3 %imm_rnum 20:4 +%imm_z6 26:1 15:5 # Argument sets: &empty @@ -82,6 +83,7 @@ @r_vm ...... vm:1 ..... ..... ... ..... ....... &rmrr %rs2 %rs1 %rd @r_vm_1 ...... . ..... ..... ... ..... ....... &rmrr vm=1 %rs2 %rs1 %rd @r_vm_0 ...... . ..... ..... ... ..... ....... &rmrr vm=0 %rs2 %rs1 %rd +@r2_zimm6 ..... . vm:1 ..... ..... ... ..... ....... &rmrr %rs2 rs1=%imm_z6 %rd @r2_zimm11 . zimm:11 ..... ... ..... ....... %rs1 %rd @r2_zimm10 .. zimm:10 ..... ... ..... ....... %rs1 %rd @r2_s ....... ..... ..... ... ..... ....... %rs2 %rs1 @@ -952,3 +954,21 @@ vclmul_vv 001100 . ..... ..... 010 ..... 1010111 @r_vm vclmul_vx 001100 . ..... ..... 110 ..... 1010111 @r_vm vclmulh_vv 001101 . ..... ..... 010 ..... 1010111 @r_vm vclmulh_vx 001101 . ..... ..... 110 ..... 1010111 @r_vm + +# *** Zvbb vector crypto extension *** +vrol_vv 010101 . ..... ..... 000 ..... 1010111 @r_vm +vrol_vx 010101 . ..... ..... 100 ..... 1010111 @r_vm +vror_vv 010100 . ..... ..... 000 ..... 1010111 @r_vm +vror_vx 010100 . ..... ..... 100 ..... 1010111 @r_vm +vror_vi 01010. . ..... ..... 011 ..... 1010111 @r2_zimm6 +vbrev8_v 010010 . ..... 01000 010 ..... 1010111 @r2_vm +vrev8_v 010010 . ..... 01001 010 ..... 1010111 @r2_vm +vandn_vv 000001 . ..... ..... 000 ..... 1010111 @r_vm +vandn_vx 000001 . ..... ..... 100 ..... 1010111 @r_vm +vbrev_v 010010 . ..... 01010 010 ..... 1010111 @r2_vm +vclz_v 010010 . ..... 01100 010 ..... 1010111 @r2_vm +vctz_v 010010 . ..... 01101 010 ..... 1010111 @r2_vm +vcpop_v 010010 . ..... 01110 010 ..... 1010111 @r2_vm +vwsll_vv 110101 . ..... ..... 000 ..... 1010111 @r_vm +vwsll_vx 110101 . ..... ..... 100 ..... 1010111 @r_vm +vwsll_vi 110101 . ..... ..... 011 ..... 1010111 @r_vm diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc index 552b08a2fd..0e4b337613 100644 --- a/target/riscv/insn_trans/trans_rvvk.c.inc +++ b/target/riscv/insn_trans/trans_rvvk.c.inc @@ -60,3 +60,167 @@ static bool vclmul_vx_check(DisasContext *s, arg_rmrr *a) GEN_VX_MASKED_TRANS(vclmul_vx, vclmul_vx_check) GEN_VX_MASKED_TRANS(vclmulh_vx, vclmul_vx_check) + +/* + * Zvbb + */ + +#define GEN_OPIVI_GVEC_TRANS_CHECK(NAME, IMM_MODE, OPIVX, SUF, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + static gen_helper_opivx *const fns[4] = { \ + gen_helper_##OPIVX##_b, \ + gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, \ + gen_helper_##OPIVX##_d, \ + }; \ + return do_opivi_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew], \ + IMM_MODE); \ + } \ + return false; \ + } + +#define GEN_OPIVV_GVEC_TRANS_CHECK(NAME, SUF, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + static gen_helper_gvec_4_ptr *const fns[4] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + return do_opivv_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ + } \ + return false; \ + } + +#define GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(NAME, SUF, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + static gen_helper_opivx *const fns[4] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + return do_opivx_gvec_shift(s, a, tcg_gen_gvec_##SUF, \ + fns[s->sew]); \ + } \ + return false; \ + } + +static bool zvbb_vv_check(DisasContext *s, arg_rmrr *a) +{ + return opivv_check(s, a) && s->cfg_ptr->ext_zvbb == true; +} + +static bool zvbb_vx_check(DisasContext *s, arg_rmrr *a) +{ + return opivx_check(s, a) && s->cfg_ptr->ext_zvbb == true; +} + +/* vrol.v[vx] */ +GEN_OPIVV_GVEC_TRANS_CHECK(vrol_vv, rotlv, zvbb_vv_check) +GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vrol_vx, rotls, zvbb_vx_check) + +/* vror.v[vxi] */ +GEN_OPIVV_GVEC_TRANS_CHECK(vror_vv, rotrv, zvbb_vv_check) +GEN_OPIVX_GVEC_SHIFT_TRANS_CHECK(vror_vx, rotrs, zvbb_vx_check) +GEN_OPIVI_GVEC_TRANS_CHECK(vror_vi, IMM_TRUNC_SEW, vror_vx, rotri, zvbb_vx_check) + +#define GEN_OPIVX_GVEC_TRANS_CHECK(NAME, SUF, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + static gen_helper_opivx *const fns[4] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + return do_opivx_gvec(s, a, tcg_gen_gvec_##SUF, fns[s->sew]); \ + } \ + return false; \ + } + +/* vandn.v[vx] */ +GEN_OPIVV_GVEC_TRANS_CHECK(vandn_vv, andc, zvbb_vv_check) +GEN_OPIVX_GVEC_TRANS_CHECK(vandn_vx, andcs, zvbb_vx_check) + +#define GEN_OPIV_TRANS(NAME, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmr *a) \ + { \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + static gen_helper_gvec_3_ptr *const fns[4] = { \ + gen_helper_##NAME##_b, \ + gen_helper_##NAME##_h, \ + gen_helper_##NAME##_w, \ + gen_helper_##NAME##_d, \ + }; \ + TCGLabel *over = gen_new_label(); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, 0), \ + vreg_ofs(s, a->rs2), cpu_env, \ + s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \ + data, fns[s->sew]); \ + mark_vs_dirty(s); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ + } + +static bool zvbb_opiv_check(DisasContext *s, arg_rmr *a) +{ + return s->cfg_ptr->ext_zvbb == true && + require_rvv(s) && + vext_check_isa_ill(s) && + vext_check_ss(s, a->rd, a->rs2, a->vm); +} + +GEN_OPIV_TRANS(vbrev8_v, zvbb_opiv_check) +GEN_OPIV_TRANS(vrev8_v, zvbb_opiv_check) +GEN_OPIV_TRANS(vbrev_v, zvbb_opiv_check) +GEN_OPIV_TRANS(vclz_v, zvbb_opiv_check) +GEN_OPIV_TRANS(vctz_v, zvbb_opiv_check) +GEN_OPIV_TRANS(vcpop_v, zvbb_opiv_check) + +static bool vwsll_vv_check(DisasContext *s, arg_rmrr *a) +{ + return s->cfg_ptr->ext_zvbb && opivv_widen_check(s, a); +} + +static bool vwsll_vx_check(DisasContext *s, arg_rmrr *a) +{ + return s->cfg_ptr->ext_zvbb && opivx_widen_check(s, a); +} + +/* OPIVI without GVEC IR */ +#define GEN_OPIVI_WIDEN_TRANS(NAME, IMM_MODE, OPIVX, CHECK) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + static gen_helper_opivx *const fns[3] = { \ + gen_helper_##OPIVX##_b, \ + gen_helper_##OPIVX##_h, \ + gen_helper_##OPIVX##_w, \ + }; \ + return opivi_trans(a->rd, a->rs1, a->rs2, a->vm, fns[s->sew], s, \ + IMM_MODE); \ + } \ + return false; \ + } + +GEN_OPIVV_WIDEN_TRANS(vwsll_vv, vwsll_vv_check) +GEN_OPIVX_WIDEN_TRANS(vwsll_vx, vwsll_vx_check) +GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check) diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c index 8b7c63d499..11239b59d6 100644 --- a/target/riscv/vcrypto_helper.c +++ b/target/riscv/vcrypto_helper.c @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "qemu/host-utils.h" #include "qemu/bitops.h" +#include "qemu/bswap.h" #include "cpu.h" #include "exec/memop.h" #include "exec/exec-all.h" @@ -57,3 +58,140 @@ RVVCALL(OPIVV2, vclmulh_vv, OP_UUU_D, H8, H8, H8, clmulh64) GEN_VEXT_VV(vclmulh_vv, 8) RVVCALL(OPIVX2, vclmulh_vx, OP_UUU_D, H8, H8, clmulh64) GEN_VEXT_VX(vclmulh_vx, 8) + +RVVCALL(OPIVV2, vror_vv_b, OP_UUU_B, H1, H1, H1, ror8) +RVVCALL(OPIVV2, vror_vv_h, OP_UUU_H, H2, H2, H2, ror16) +RVVCALL(OPIVV2, vror_vv_w, OP_UUU_W, H4, H4, H4, ror32) +RVVCALL(OPIVV2, vror_vv_d, OP_UUU_D, H8, H8, H8, ror64) +GEN_VEXT_VV(vror_vv_b, 1) +GEN_VEXT_VV(vror_vv_h, 2) +GEN_VEXT_VV(vror_vv_w, 4) +GEN_VEXT_VV(vror_vv_d, 8) + +RVVCALL(OPIVX2, vror_vx_b, OP_UUU_B, H1, H1, ror8) +RVVCALL(OPIVX2, vror_vx_h, OP_UUU_H, H2, H2, ror16) +RVVCALL(OPIVX2, vror_vx_w, OP_UUU_W, H4, H4, ror32) +RVVCALL(OPIVX2, vror_vx_d, OP_UUU_D, H8, H8, ror64) +GEN_VEXT_VX(vror_vx_b, 1) +GEN_VEXT_VX(vror_vx_h, 2) +GEN_VEXT_VX(vror_vx_w, 4) +GEN_VEXT_VX(vror_vx_d, 8) + +RVVCALL(OPIVV2, vrol_vv_b, OP_UUU_B, H1, H1, H1, rol8) +RVVCALL(OPIVV2, vrol_vv_h, OP_UUU_H, H2, H2, H2, rol16) +RVVCALL(OPIVV2, vrol_vv_w, OP_UUU_W, H4, H4, H4, rol32) +RVVCALL(OPIVV2, vrol_vv_d, OP_UUU_D, H8, H8, H8, rol64) +GEN_VEXT_VV(vrol_vv_b, 1) +GEN_VEXT_VV(vrol_vv_h, 2) +GEN_VEXT_VV(vrol_vv_w, 4) +GEN_VEXT_VV(vrol_vv_d, 8) + +RVVCALL(OPIVX2, vrol_vx_b, OP_UUU_B, H1, H1, rol8) +RVVCALL(OPIVX2, vrol_vx_h, OP_UUU_H, H2, H2, rol16) +RVVCALL(OPIVX2, vrol_vx_w, OP_UUU_W, H4, H4, rol32) +RVVCALL(OPIVX2, vrol_vx_d, OP_UUU_D, H8, H8, rol64) +GEN_VEXT_VX(vrol_vx_b, 1) +GEN_VEXT_VX(vrol_vx_h, 2) +GEN_VEXT_VX(vrol_vx_w, 4) +GEN_VEXT_VX(vrol_vx_d, 8) + +static uint64_t brev8(uint64_t val) +{ + val = ((val & 0x5555555555555555ull) << 1) | + ((val & 0xAAAAAAAAAAAAAAAAull) >> 1); + val = ((val & 0x3333333333333333ull) << 2) | + ((val & 0xCCCCCCCCCCCCCCCCull) >> 2); + val = ((val & 0x0F0F0F0F0F0F0F0Full) << 4) | + ((val & 0xF0F0F0F0F0F0F0F0ull) >> 4); + + return val; +} + +RVVCALL(OPIVV1, vbrev8_v_b, OP_UU_B, H1, H1, brev8) +RVVCALL(OPIVV1, vbrev8_v_h, OP_UU_H, H2, H2, brev8) +RVVCALL(OPIVV1, vbrev8_v_w, OP_UU_W, H4, H4, brev8) +RVVCALL(OPIVV1, vbrev8_v_d, OP_UU_D, H8, H8, brev8) +GEN_VEXT_V(vbrev8_v_b, 1) +GEN_VEXT_V(vbrev8_v_h, 2) +GEN_VEXT_V(vbrev8_v_w, 4) +GEN_VEXT_V(vbrev8_v_d, 8) + +#define DO_IDENTITY(a) (a) +RVVCALL(OPIVV1, vrev8_v_b, OP_UU_B, H1, H1, DO_IDENTITY) +RVVCALL(OPIVV1, vrev8_v_h, OP_UU_H, H2, H2, bswap16) +RVVCALL(OPIVV1, vrev8_v_w, OP_UU_W, H4, H4, bswap32) +RVVCALL(OPIVV1, vrev8_v_d, OP_UU_D, H8, H8, bswap64) +GEN_VEXT_V(vrev8_v_b, 1) +GEN_VEXT_V(vrev8_v_h, 2) +GEN_VEXT_V(vrev8_v_w, 4) +GEN_VEXT_V(vrev8_v_d, 8) + +#define DO_ANDN(a, b) ((a) & ~(b)) +RVVCALL(OPIVV2, vandn_vv_b, OP_UUU_B, H1, H1, H1, DO_ANDN) +RVVCALL(OPIVV2, vandn_vv_h, OP_UUU_H, H2, H2, H2, DO_ANDN) +RVVCALL(OPIVV2, vandn_vv_w, OP_UUU_W, H4, H4, H4, DO_ANDN) +RVVCALL(OPIVV2, vandn_vv_d, OP_UUU_D, H8, H8, H8, DO_ANDN) +GEN_VEXT_VV(vandn_vv_b, 1) +GEN_VEXT_VV(vandn_vv_h, 2) +GEN_VEXT_VV(vandn_vv_w, 4) +GEN_VEXT_VV(vandn_vv_d, 8) + +RVVCALL(OPIVX2, vandn_vx_b, OP_UUU_B, H1, H1, DO_ANDN) +RVVCALL(OPIVX2, vandn_vx_h, OP_UUU_H, H2, H2, DO_ANDN) +RVVCALL(OPIVX2, vandn_vx_w, OP_UUU_W, H4, H4, DO_ANDN) +RVVCALL(OPIVX2, vandn_vx_d, OP_UUU_D, H8, H8, DO_ANDN) +GEN_VEXT_VX(vandn_vx_b, 1) +GEN_VEXT_VX(vandn_vx_h, 2) +GEN_VEXT_VX(vandn_vx_w, 4) +GEN_VEXT_VX(vandn_vx_d, 8) + +RVVCALL(OPIVV1, vbrev_v_b, OP_UU_B, H1, H1, revbit8) +RVVCALL(OPIVV1, vbrev_v_h, OP_UU_H, H2, H2, revbit16) +RVVCALL(OPIVV1, vbrev_v_w, OP_UU_W, H4, H4, revbit32) +RVVCALL(OPIVV1, vbrev_v_d, OP_UU_D, H8, H8, revbit64) +GEN_VEXT_V(vbrev_v_b, 1) +GEN_VEXT_V(vbrev_v_h, 2) +GEN_VEXT_V(vbrev_v_w, 4) +GEN_VEXT_V(vbrev_v_d, 8) + +RVVCALL(OPIVV1, vclz_v_b, OP_UU_B, H1, H1, clz8) +RVVCALL(OPIVV1, vclz_v_h, OP_UU_H, H2, H2, clz16) +RVVCALL(OPIVV1, vclz_v_w, OP_UU_W, H4, H4, clz32) +RVVCALL(OPIVV1, vclz_v_d, OP_UU_D, H8, H8, clz64) +GEN_VEXT_V(vclz_v_b, 1) +GEN_VEXT_V(vclz_v_h, 2) +GEN_VEXT_V(vclz_v_w, 4) +GEN_VEXT_V(vclz_v_d, 8) + +RVVCALL(OPIVV1, vctz_v_b, OP_UU_B, H1, H1, ctz8) +RVVCALL(OPIVV1, vctz_v_h, OP_UU_H, H2, H2, ctz16) +RVVCALL(OPIVV1, vctz_v_w, OP_UU_W, H4, H4, ctz32) +RVVCALL(OPIVV1, vctz_v_d, OP_UU_D, H8, H8, ctz64) +GEN_VEXT_V(vctz_v_b, 1) +GEN_VEXT_V(vctz_v_h, 2) +GEN_VEXT_V(vctz_v_w, 4) +GEN_VEXT_V(vctz_v_d, 8) + +RVVCALL(OPIVV1, vcpop_v_b, OP_UU_B, H1, H1, ctpop8) +RVVCALL(OPIVV1, vcpop_v_h, OP_UU_H, H2, H2, ctpop16) +RVVCALL(OPIVV1, vcpop_v_w, OP_UU_W, H4, H4, ctpop32) +RVVCALL(OPIVV1, vcpop_v_d, OP_UU_D, H8, H8, ctpop64) +GEN_VEXT_V(vcpop_v_b, 1) +GEN_VEXT_V(vcpop_v_h, 2) +GEN_VEXT_V(vcpop_v_w, 4) +GEN_VEXT_V(vcpop_v_d, 8) + +#define DO_SLL(N, M) (N << (M & (sizeof(N) * 8 - 1))) +RVVCALL(OPIVV2, vwsll_vv_b, WOP_UUU_B, H2, H1, H1, DO_SLL) +RVVCALL(OPIVV2, vwsll_vv_h, WOP_UUU_H, H4, H2, H2, DO_SLL) +RVVCALL(OPIVV2, vwsll_vv_w, WOP_UUU_W, H8, H4, H4, DO_SLL) +GEN_VEXT_VV(vwsll_vv_b, 2) +GEN_VEXT_VV(vwsll_vv_h, 4) +GEN_VEXT_VV(vwsll_vv_w, 8) + +RVVCALL(OPIVX2, vwsll_vx_b, WOP_UUU_B, H2, H1, DO_SLL) +RVVCALL(OPIVX2, vwsll_vx_h, WOP_UUU_H, H4, H2, DO_SLL) +RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL) +GEN_VEXT_VX(vwsll_vx_b, 2) +GEN_VEXT_VX(vwsll_vx_h, 4) +GEN_VEXT_VX(vwsll_vx_w, 8) From e972bf22f6f00a1a145a2e2285095aa180beb143 Mon Sep 17 00:00:00 2001 From: Nazar Kazakov Date: Wed, 12 Jul 2023 00:59:08 +0800 Subject: [PATCH 16/45] target/riscv: Add Zvkned ISA extension support This commit adds support for the Zvkned vector-crypto extension, which consists of the following instructions: * vaesef.[vv,vs] * vaesdf.[vv,vs] * vaesdm.[vv,vs] * vaesz.vs * vaesem.[vv,vs] * vaeskf1.vi * vaeskf2.vi Translation functions are defined in `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in `target/riscv/vcrypto_helper.c`. Co-authored-by: Lawrence Hunter Co-authored-by: William Salmon [max.chou@sifive.com: Replaced vstart checking by TCG op] Signed-off-by: Lawrence Hunter Signed-off-by: William Salmon Signed-off-by: Nazar Kazakov Signed-off-by: Max Chou Reviewed-by: Daniel Henrique Barboza [max.chou@sifive.com: Imported aes-round.h and exposed x-zvkned property] [max.chou@sifive.com: Fixed endian issues and replaced the vstart & vl egs checking by helper function] [max.chou@sifive.com: Replaced bswap32 calls in aes key expanding] Message-ID: <20230711165917.2629866-10-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 4 +- target/riscv/cpu_cfg.h | 1 + target/riscv/helper.h | 14 ++ target/riscv/insn32.decode | 14 ++ target/riscv/insn_trans/trans_rvvk.c.inc | 147 +++++++++++++++++ target/riscv/vcrypto_helper.c | 202 +++++++++++++++++++++++ 6 files changed, 381 insertions(+), 1 deletion(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index ccffbad4f5..8e3ae4d7e0 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -129,6 +129,7 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma), ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh), ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin), + ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), @@ -1277,7 +1278,7 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) * In principle Zve*x would also suffice here, were they supported * in qemu */ - if (cpu->cfg.ext_zvbb && !cpu->cfg.ext_zve32f) { + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned) && !cpu->cfg.ext_zve32f) { error_setg(errp, "Vector crypto extensions require V or Zve* extensions"); return; @@ -1873,6 +1874,7 @@ static Property riscv_cpu_extensions[] = { /* Vector cryptography extensions */ DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), + DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index 0e31ebeed9..c7eafe27c0 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -87,6 +87,7 @@ struct RISCVCPUConfig { bool ext_zve64d; bool ext_zvbb; bool ext_zvbc; + bool ext_zvkned; bool ext_zmmul; bool ext_zvfbfmin; bool ext_zvfbfwma; diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 3db25ed2a2..02e5dbe6ee 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1250,3 +1250,17 @@ DEF_HELPER_6(vandn_vx_b, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vandn_vx_h, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vandn_vx_w, void, ptr, ptr, tl, ptr, env, i32) DEF_HELPER_6(vandn_vx_d, void, ptr, ptr, tl, ptr, env, i32) + +DEF_HELPER_2(egs_check, void, i32, env) + +DEF_HELPER_4(vaesef_vv, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesef_vs, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesdf_vv, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesdf_vs, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesem_vv, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesem_vs, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesdm_vv, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesdm_vs, void, ptr, ptr, env, i32) +DEF_HELPER_4(vaesz_vs, void, ptr, ptr, env, i32) +DEF_HELPER_5(vaeskf1_vi, void, ptr, ptr, i32, env, i32) +DEF_HELPER_5(vaeskf2_vi, void, ptr, ptr, i32, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index b982a8325b..4f3c50f10f 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -75,6 +75,7 @@ @r_rm ....... ..... ..... ... ..... ....... %rs2 %rs1 %rm %rd @r2_rm ....... ..... ..... ... ..... ....... %rs1 %rm %rd @r2 ....... ..... ..... ... ..... ....... &r2 %rs1 %rd +@r2_vm_1 ...... . ..... ..... ... ..... ....... &rmr vm=1 %rs2 %rd @r2_nfvm ... ... vm:1 ..... ..... ... ..... ....... &r2nfvm %nf %rs1 %rd @r2_vm ...... vm:1 ..... ..... ... ..... ....... &rmr %rs2 %rd @r1_vm ...... vm:1 ..... ..... ... ..... ....... %rd @@ -972,3 +973,16 @@ vcpop_v 010010 . ..... 01110 010 ..... 1010111 @r2_vm vwsll_vv 110101 . ..... ..... 000 ..... 1010111 @r_vm vwsll_vx 110101 . ..... ..... 100 ..... 1010111 @r_vm vwsll_vi 110101 . ..... ..... 011 ..... 1010111 @r_vm + +# *** Zvkned vector crypto extension *** +vaesef_vv 101000 1 ..... 00011 010 ..... 1110111 @r2_vm_1 +vaesef_vs 101001 1 ..... 00011 010 ..... 1110111 @r2_vm_1 +vaesdf_vv 101000 1 ..... 00001 010 ..... 1110111 @r2_vm_1 +vaesdf_vs 101001 1 ..... 00001 010 ..... 1110111 @r2_vm_1 +vaesem_vv 101000 1 ..... 00010 010 ..... 1110111 @r2_vm_1 +vaesem_vs 101001 1 ..... 00010 010 ..... 1110111 @r2_vm_1 +vaesdm_vv 101000 1 ..... 00000 010 ..... 1110111 @r2_vm_1 +vaesdm_vs 101001 1 ..... 00000 010 ..... 1110111 @r2_vm_1 +vaesz_vs 101001 1 ..... 00111 010 ..... 1110111 @r2_vm_1 +vaeskf1_vi 100010 1 ..... ..... 010 ..... 1110111 @r_vm_1 +vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1 diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc index 0e4b337613..817353f4d3 100644 --- a/target/riscv/insn_trans/trans_rvvk.c.inc +++ b/target/riscv/insn_trans/trans_rvvk.c.inc @@ -224,3 +224,150 @@ static bool vwsll_vx_check(DisasContext *s, arg_rmrr *a) GEN_OPIVV_WIDEN_TRANS(vwsll_vv, vwsll_vv_check) GEN_OPIVX_WIDEN_TRANS(vwsll_vx, vwsll_vx_check) GEN_OPIVI_WIDEN_TRANS(vwsll_vi, IMM_ZX, vwsll_vx, vwsll_vx_check) + +/* + * Zvkned + */ + +#define ZVKNED_EGS 4 + +#define GEN_V_UNMASKED_TRANS(NAME, CHECK, EGS) \ + static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ + { \ + if (CHECK(s, a)) { \ + TCGv_ptr rd_v, rs2_v; \ + TCGv_i32 desc, egs; \ + uint32_t data = 0; \ + TCGLabel *over = gen_new_label(); \ + \ + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ + /* save opcode for unwinding in case we throw an exception */ \ + decode_save_opc(s); \ + egs = tcg_constant_i32(EGS); \ + gen_helper_egs_check(egs, cpu_env); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ + } \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + rd_v = tcg_temp_new_ptr(); \ + rs2_v = tcg_temp_new_ptr(); \ + desc = tcg_constant_i32( \ + simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \ + tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd)); \ + tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2)); \ + gen_helper_##NAME(rd_v, rs2_v, cpu_env, desc); \ + mark_vs_dirty(s); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ + } + +static bool vaes_check_vv(DisasContext *s, arg_rmr *a) +{ + int egw_bytes = ZVKNED_EGS << s->sew; + return s->cfg_ptr->ext_zvkned == true && + require_rvv(s) && + vext_check_isa_ill(s) && + MAXSZ(s) >= egw_bytes && + require_align(a->rd, s->lmul) && + require_align(a->rs2, s->lmul) && + s->sew == MO_32; +} + +static bool vaes_check_overlap(DisasContext *s, int vd, int vs2) +{ + int8_t op_size = s->lmul <= 0 ? 1 : 1 << s->lmul; + return !is_overlapped(vd, op_size, vs2, 1); +} + +static bool vaes_check_vs(DisasContext *s, arg_rmr *a) +{ + int egw_bytes = ZVKNED_EGS << s->sew; + return vaes_check_overlap(s, a->rd, a->rs2) && + MAXSZ(s) >= egw_bytes && + s->cfg_ptr->ext_zvkned == true && + require_rvv(s) && + vext_check_isa_ill(s) && + require_align(a->rd, s->lmul) && + s->sew == MO_32; +} + +GEN_V_UNMASKED_TRANS(vaesef_vv, vaes_check_vv, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesef_vs, vaes_check_vs, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesdf_vv, vaes_check_vv, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesdf_vs, vaes_check_vs, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesdm_vv, vaes_check_vv, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesdm_vs, vaes_check_vs, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesz_vs, vaes_check_vs, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesem_vv, vaes_check_vv, ZVKNED_EGS) +GEN_V_UNMASKED_TRANS(vaesem_vs, vaes_check_vs, ZVKNED_EGS) + +#define GEN_VI_UNMASKED_TRANS(NAME, CHECK, EGS) \ + static bool trans_##NAME(DisasContext *s, arg_##NAME *a) \ + { \ + if (CHECK(s, a)) { \ + TCGv_ptr rd_v, rs2_v; \ + TCGv_i32 uimm_v, desc, egs; \ + uint32_t data = 0; \ + TCGLabel *over = gen_new_label(); \ + \ + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ + /* save opcode for unwinding in case we throw an exception */ \ + decode_save_opc(s); \ + egs = tcg_constant_i32(EGS); \ + gen_helper_egs_check(egs, cpu_env); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ + } \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + \ + rd_v = tcg_temp_new_ptr(); \ + rs2_v = tcg_temp_new_ptr(); \ + uimm_v = tcg_constant_i32(a->rs1); \ + desc = tcg_constant_i32( \ + simd_desc(s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, data)); \ + tcg_gen_addi_ptr(rd_v, cpu_env, vreg_ofs(s, a->rd)); \ + tcg_gen_addi_ptr(rs2_v, cpu_env, vreg_ofs(s, a->rs2)); \ + gen_helper_##NAME(rd_v, rs2_v, uimm_v, cpu_env, desc); \ + mark_vs_dirty(s); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ + } + +static bool vaeskf1_check(DisasContext *s, arg_vaeskf1_vi *a) +{ + int egw_bytes = ZVKNED_EGS << s->sew; + return s->cfg_ptr->ext_zvkned == true && + require_rvv(s) && + vext_check_isa_ill(s) && + MAXSZ(s) >= egw_bytes && + s->sew == MO_32 && + require_align(a->rd, s->lmul) && + require_align(a->rs2, s->lmul); +} + +static bool vaeskf2_check(DisasContext *s, arg_vaeskf2_vi *a) +{ + int egw_bytes = ZVKNED_EGS << s->sew; + return s->cfg_ptr->ext_zvkned == true && + require_rvv(s) && + vext_check_isa_ill(s) && + MAXSZ(s) >= egw_bytes && + s->sew == MO_32 && + require_align(a->rd, s->lmul) && + require_align(a->rs2, s->lmul); +} + +GEN_VI_UNMASKED_TRANS(vaeskf1_vi, vaeskf1_check, ZVKNED_EGS) +GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS) diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c index 11239b59d6..cca78184e9 100644 --- a/target/riscv/vcrypto_helper.c +++ b/target/riscv/vcrypto_helper.c @@ -22,6 +22,8 @@ #include "qemu/bitops.h" #include "qemu/bswap.h" #include "cpu.h" +#include "crypto/aes.h" +#include "crypto/aes-round.h" #include "exec/memop.h" #include "exec/exec-all.h" #include "exec/helper-proto.h" @@ -195,3 +197,203 @@ RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4, DO_SLL) GEN_VEXT_VX(vwsll_vx_b, 2) GEN_VEXT_VX(vwsll_vx_h, 4) GEN_VEXT_VX(vwsll_vx_w, 8) + +void HELPER(egs_check)(uint32_t egs, CPURISCVState *env) +{ + uint32_t vl = env->vl; + uint32_t vstart = env->vstart; + + if (vl % egs != 0 || vstart % egs != 0) { + riscv_raise_exception(env, RISCV_EXCP_ILLEGAL_INST, GETPC()); + } +} + +static inline void xor_round_key(AESState *round_state, AESState *round_key) +{ + round_state->v = round_state->v ^ round_key->v; +} + +#define GEN_ZVKNED_HELPER_VV(NAME, ...) \ + void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ + uint32_t desc) \ + { \ + uint32_t vl = env->vl; \ + uint32_t total_elems = vext_get_total_elems(env, desc, 4); \ + uint32_t vta = vext_vta(desc); \ + \ + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \ + AESState round_key; \ + round_key.d[0] = *((uint64_t *)vs2 + H8(i * 2 + 0)); \ + round_key.d[1] = *((uint64_t *)vs2 + H8(i * 2 + 1)); \ + AESState round_state; \ + round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0)); \ + round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1)); \ + __VA_ARGS__; \ + *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0]; \ + *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1]; \ + } \ + env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); \ + } + +#define GEN_ZVKNED_HELPER_VS(NAME, ...) \ + void HELPER(NAME)(void *vd, void *vs2, CPURISCVState *env, \ + uint32_t desc) \ + { \ + uint32_t vl = env->vl; \ + uint32_t total_elems = vext_get_total_elems(env, desc, 4); \ + uint32_t vta = vext_vta(desc); \ + \ + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { \ + AESState round_key; \ + round_key.d[0] = *((uint64_t *)vs2 + H8(0)); \ + round_key.d[1] = *((uint64_t *)vs2 + H8(1)); \ + AESState round_state; \ + round_state.d[0] = *((uint64_t *)vd + H8(i * 2 + 0)); \ + round_state.d[1] = *((uint64_t *)vd + H8(i * 2 + 1)); \ + __VA_ARGS__; \ + *((uint64_t *)vd + H8(i * 2 + 0)) = round_state.d[0]; \ + *((uint64_t *)vd + H8(i * 2 + 1)) = round_state.d[1]; \ + } \ + env->vstart = 0; \ + /* set tail elements to 1s */ \ + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); \ + } + +GEN_ZVKNED_HELPER_VV(vaesef_vv, aesenc_SB_SR_AK(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VS(vaesef_vs, aesenc_SB_SR_AK(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VV(vaesdf_vv, aesdec_ISB_ISR_AK(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VS(vaesdf_vs, aesdec_ISB_ISR_AK(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VV(vaesem_vv, aesenc_SB_SR_MC_AK(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VS(vaesem_vs, aesenc_SB_SR_MC_AK(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VV(vaesdm_vv, aesdec_ISB_ISR_AK_IMC(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VS(vaesdm_vs, aesdec_ISB_ISR_AK_IMC(&round_state, + &round_state, + &round_key, + false);) +GEN_ZVKNED_HELPER_VS(vaesz_vs, xor_round_key(&round_state, &round_key);) + +void HELPER(vaeskf1_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, + CPURISCVState *env, uint32_t desc) +{ + uint32_t *vd = vd_vptr; + uint32_t *vs2 = vs2_vptr; + uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(env, desc, 4); + uint32_t vta = vext_vta(desc); + + uimm &= 0b1111; + if (uimm > 10 || uimm == 0) { + uimm ^= 0b1000; + } + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + uint32_t rk[8], tmp; + static const uint32_t rcon[] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, + 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036, + }; + + rk[0] = vs2[i * 4 + H4(0)]; + rk[1] = vs2[i * 4 + H4(1)]; + rk[2] = vs2[i * 4 + H4(2)]; + rk[3] = vs2[i * 4 + H4(3)]; + tmp = ror32(rk[3], 8); + + rk[4] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) | + ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) | + ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) | + ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0)) + ^ rcon[uimm - 1]; + rk[5] = rk[1] ^ rk[4]; + rk[6] = rk[2] ^ rk[5]; + rk[7] = rk[3] ^ rk[6]; + + vd[i * 4 + H4(0)] = rk[4]; + vd[i * 4 + H4(1)] = rk[5]; + vd[i * 4 + H4(2)] = rk[6]; + vd[i * 4 + H4(3)] = rk[7]; + } + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); +} + +void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, + CPURISCVState *env, uint32_t desc) +{ + uint32_t *vd = vd_vptr; + uint32_t *vs2 = vs2_vptr; + uint32_t vl = env->vl; + uint32_t total_elems = vext_get_total_elems(env, desc, 4); + uint32_t vta = vext_vta(desc); + + uimm &= 0b1111; + if (uimm > 14 || uimm < 2) { + uimm ^= 0b1000; + } + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + uint32_t rk[12], tmp; + static const uint32_t rcon[] = { + 0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, + 0x00000020, 0x00000040, 0x00000080, 0x0000001B, 0x00000036, + }; + + rk[0] = vd[i * 4 + H4(0)]; + rk[1] = vd[i * 4 + H4(1)]; + rk[2] = vd[i * 4 + H4(2)]; + rk[3] = vd[i * 4 + H4(3)]; + rk[4] = vs2[i * 4 + H4(0)]; + rk[5] = vs2[i * 4 + H4(1)]; + rk[6] = vs2[i * 4 + H4(2)]; + rk[7] = vs2[i * 4 + H4(3)]; + + if (uimm % 2 == 0) { + tmp = ror32(rk[7], 8); + rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(tmp >> 24) & 0xff] << 24) | + ((uint32_t)AES_sbox[(tmp >> 16) & 0xff] << 16) | + ((uint32_t)AES_sbox[(tmp >> 8) & 0xff] << 8) | + ((uint32_t)AES_sbox[(tmp >> 0) & 0xff] << 0)) + ^ rcon[(uimm - 1) / 2]; + } else { + rk[8] = rk[0] ^ (((uint32_t)AES_sbox[(rk[7] >> 24) & 0xff] << 24) | + ((uint32_t)AES_sbox[(rk[7] >> 16) & 0xff] << 16) | + ((uint32_t)AES_sbox[(rk[7] >> 8) & 0xff] << 8) | + ((uint32_t)AES_sbox[(rk[7] >> 0) & 0xff] << 0)); + } + rk[9] = rk[1] ^ rk[8]; + rk[10] = rk[2] ^ rk[9]; + rk[11] = rk[3] ^ rk[10]; + + vd[i * 4 + H4(0)] = rk[8]; + vd[i * 4 + H4(1)] = rk[9]; + vd[i * 4 + H4(2)] = rk[10]; + vd[i * 4 + H4(3)] = rk[11]; + } + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); +} From fcf1943376a50a26382143da5f886609c0619d44 Mon Sep 17 00:00:00 2001 From: Kiran Ostrolenk Date: Wed, 12 Jul 2023 00:59:09 +0800 Subject: [PATCH 17/45] target/riscv: Add Zvknh ISA extension support This commit adds support for the Zvknh vector-crypto extension, which consists of the following instructions: * vsha2ms.vv * vsha2c[hl].vv Translation functions are defined in `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in `target/riscv/vcrypto_helper.c`. Co-authored-by: Nazar Kazakov Co-authored-by: Lawrence Hunter [max.chou@sifive.com: Replaced vstart checking by TCG op] Signed-off-by: Nazar Kazakov Signed-off-by: Lawrence Hunter Signed-off-by: Kiran Ostrolenk Signed-off-by: Max Chou Reviewed-by: Daniel Henrique Barboza [max.chou@sifive.com: Exposed x-zvknha & x-zvknhb properties] [max.chou@sifive.com: Replaced SEW selection to happened during translation] Message-ID: <20230711165917.2629866-11-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 13 +- target/riscv/cpu_cfg.h | 2 + target/riscv/helper.h | 6 + target/riscv/insn32.decode | 5 + target/riscv/insn_trans/trans_rvvk.c.inc | 129 ++++++++++++ target/riscv/vcrypto_helper.c | 238 +++++++++++++++++++++++ 6 files changed, 390 insertions(+), 3 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 8e3ae4d7e0..f103f536fd 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -130,6 +130,8 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh), ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin), ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), + ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), + ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), @@ -1278,14 +1280,17 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) * In principle Zve*x would also suffice here, were they supported * in qemu */ - if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned) && !cpu->cfg.ext_zve32f) { + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha) && + !cpu->cfg.ext_zve32f) { error_setg(errp, "Vector crypto extensions require V or Zve* extensions"); return; } - if (cpu->cfg.ext_zvbc && !cpu->cfg.ext_zve64f) { - error_setg(errp, "Zvbc extension requires V or Zve64{f,d} extensions"); + if ((cpu->cfg.ext_zvbc || cpu->cfg.ext_zvknhb) && !cpu->cfg.ext_zve64f) { + error_setg( + errp, + "Zvbc and Zvknhb extensions require V or Zve64{f,d} extensions"); return; } @@ -1875,6 +1880,8 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), + DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), + DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index c7eafe27c0..800b8783c1 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -88,6 +88,8 @@ struct RISCVCPUConfig { bool ext_zvbb; bool ext_zvbc; bool ext_zvkned; + bool ext_zvknha; + bool ext_zvknhb; bool ext_zmmul; bool ext_zvfbfmin; bool ext_zvfbfwma; diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 02e5dbe6ee..34329b52fe 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1264,3 +1264,9 @@ DEF_HELPER_4(vaesdm_vs, void, ptr, ptr, env, i32) DEF_HELPER_4(vaesz_vs, void, ptr, ptr, env, i32) DEF_HELPER_5(vaeskf1_vi, void, ptr, ptr, i32, env, i32) DEF_HELPER_5(vaeskf2_vi, void, ptr, ptr, i32, env, i32) + +DEF_HELPER_5(vsha2ms_vv, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vsha2ch32_vv, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vsha2ch64_vv, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vsha2cl32_vv, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 4f3c50f10f..e2b83186dc 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -986,3 +986,8 @@ vaesdm_vs 101001 1 ..... 00000 010 ..... 1110111 @r2_vm_1 vaesz_vs 101001 1 ..... 00111 010 ..... 1110111 @r2_vm_1 vaeskf1_vi 100010 1 ..... ..... 010 ..... 1110111 @r_vm_1 vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1 + +# *** Zvknh vector crypto extension *** +vsha2ms_vv 101101 1 ..... ..... 010 ..... 1110111 @r_vm_1 +vsha2ch_vv 101110 1 ..... ..... 010 ..... 1110111 @r_vm_1 +vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1 diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc index 817353f4d3..a35be11b95 100644 --- a/target/riscv/insn_trans/trans_rvvk.c.inc +++ b/target/riscv/insn_trans/trans_rvvk.c.inc @@ -371,3 +371,132 @@ static bool vaeskf2_check(DisasContext *s, arg_vaeskf2_vi *a) GEN_VI_UNMASKED_TRANS(vaeskf1_vi, vaeskf1_check, ZVKNED_EGS) GEN_VI_UNMASKED_TRANS(vaeskf2_vi, vaeskf2_check, ZVKNED_EGS) + +/* + * Zvknh + */ + +#define ZVKNH_EGS 4 + +#define GEN_VV_UNMASKED_TRANS(NAME, CHECK, EGS) \ + static bool trans_##NAME(DisasContext *s, arg_rmrr *a) \ + { \ + if (CHECK(s, a)) { \ + uint32_t data = 0; \ + TCGLabel *over = gen_new_label(); \ + TCGv_i32 egs; \ + \ + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { \ + /* save opcode for unwinding in case we throw an exception */ \ + decode_save_opc(s); \ + egs = tcg_constant_i32(EGS); \ + gen_helper_egs_check(egs, cpu_env); \ + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); \ + } \ + \ + data = FIELD_DP32(data, VDATA, VM, a->vm); \ + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); \ + data = FIELD_DP32(data, VDATA, VTA, s->vta); \ + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); \ + data = FIELD_DP32(data, VDATA, VMA, s->vma); \ + \ + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), \ + vreg_ofs(s, a->rs2), cpu_env, \ + s->cfg_ptr->vlen / 8, s->cfg_ptr->vlen / 8, \ + data, gen_helper_##NAME); \ + \ + mark_vs_dirty(s); \ + gen_set_label(over); \ + return true; \ + } \ + return false; \ + } + +static bool vsha_check_sew(DisasContext *s) +{ + return (s->cfg_ptr->ext_zvknha == true && s->sew == MO_32) || + (s->cfg_ptr->ext_zvknhb == true && + (s->sew == MO_32 || s->sew == MO_64)); +} + +static bool vsha_check(DisasContext *s, arg_rmrr *a) +{ + int egw_bytes = ZVKNH_EGS << s->sew; + int mult = 1 << MAX(s->lmul, 0); + return opivv_check(s, a) && + vsha_check_sew(s) && + MAXSZ(s) >= egw_bytes && + !is_overlapped(a->rd, mult, a->rs1, mult) && + !is_overlapped(a->rd, mult, a->rs2, mult) && + s->lmul >= 0; +} + +GEN_VV_UNMASKED_TRANS(vsha2ms_vv, vsha_check, ZVKNH_EGS) + +static bool trans_vsha2cl_vv(DisasContext *s, arg_rmrr *a) +{ + if (vsha_check(s, a)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(); + TCGv_i32 egs; + + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { + /* save opcode for unwinding in case we throw an exception */ + decode_save_opc(s); + egs = tcg_constant_i32(ZVKNH_EGS); + gen_helper_egs_check(egs, cpu_env); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); + } + + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); + data = FIELD_DP32(data, VDATA, VMA, s->vma); + + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), + vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, + s->cfg_ptr->vlen / 8, data, + s->sew == MO_32 ? + gen_helper_vsha2cl32_vv : gen_helper_vsha2cl64_vv); + + mark_vs_dirty(s); + gen_set_label(over); + return true; + } + return false; +} + +static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a) +{ + if (vsha_check(s, a)) { + uint32_t data = 0; + TCGLabel *over = gen_new_label(); + TCGv_i32 egs; + + if (!s->vstart_eq_zero || !s->vl_eq_vlmax) { + /* save opcode for unwinding in case we throw an exception */ + decode_save_opc(s); + egs = tcg_constant_i32(ZVKNH_EGS); + gen_helper_egs_check(egs, cpu_env); + tcg_gen_brcond_tl(TCG_COND_GEU, cpu_vstart, cpu_vl, over); + } + + data = FIELD_DP32(data, VDATA, VM, a->vm); + data = FIELD_DP32(data, VDATA, LMUL, s->lmul); + data = FIELD_DP32(data, VDATA, VTA, s->vta); + data = FIELD_DP32(data, VDATA, VTA_ALL_1S, s->cfg_vta_all_1s); + data = FIELD_DP32(data, VDATA, VMA, s->vma); + + tcg_gen_gvec_3_ptr(vreg_ofs(s, a->rd), vreg_ofs(s, a->rs1), + vreg_ofs(s, a->rs2), cpu_env, s->cfg_ptr->vlen / 8, + s->cfg_ptr->vlen / 8, data, + s->sew == MO_32 ? + gen_helper_vsha2ch32_vv : gen_helper_vsha2ch64_vv); + + mark_vs_dirty(s); + gen_set_label(over); + return true; + } + return false; +} diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c index cca78184e9..2f2099b6fb 100644 --- a/target/riscv/vcrypto_helper.c +++ b/target/riscv/vcrypto_helper.c @@ -397,3 +397,241 @@ void HELPER(vaeskf2_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, /* set tail elements to 1s */ vext_set_elems_1s(vd, vta, vl * 4, total_elems * 4); } + +static inline uint32_t sig0_sha256(uint32_t x) +{ + return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3); +} + +static inline uint32_t sig1_sha256(uint32_t x) +{ + return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10); +} + +static inline uint64_t sig0_sha512(uint64_t x) +{ + return ror64(x, 1) ^ ror64(x, 8) ^ (x >> 7); +} + +static inline uint64_t sig1_sha512(uint64_t x) +{ + return ror64(x, 19) ^ ror64(x, 61) ^ (x >> 6); +} + +static inline void vsha2ms_e32(uint32_t *vd, uint32_t *vs1, uint32_t *vs2) +{ + uint32_t res[4]; + res[0] = sig1_sha256(vs1[H4(2)]) + vs2[H4(1)] + sig0_sha256(vd[H4(1)]) + + vd[H4(0)]; + res[1] = sig1_sha256(vs1[H4(3)]) + vs2[H4(2)] + sig0_sha256(vd[H4(2)]) + + vd[H4(1)]; + res[2] = + sig1_sha256(res[0]) + vs2[H4(3)] + sig0_sha256(vd[H4(3)]) + vd[H4(2)]; + res[3] = + sig1_sha256(res[1]) + vs1[H4(0)] + sig0_sha256(vs2[H4(0)]) + vd[H4(3)]; + vd[H4(3)] = res[3]; + vd[H4(2)] = res[2]; + vd[H4(1)] = res[1]; + vd[H4(0)] = res[0]; +} + +static inline void vsha2ms_e64(uint64_t *vd, uint64_t *vs1, uint64_t *vs2) +{ + uint64_t res[4]; + res[0] = sig1_sha512(vs1[2]) + vs2[1] + sig0_sha512(vd[1]) + vd[0]; + res[1] = sig1_sha512(vs1[3]) + vs2[2] + sig0_sha512(vd[2]) + vd[1]; + res[2] = sig1_sha512(res[0]) + vs2[3] + sig0_sha512(vd[3]) + vd[2]; + res[3] = sig1_sha512(res[1]) + vs1[0] + sig0_sha512(vs2[0]) + vd[3]; + vd[3] = res[3]; + vd[2] = res[2]; + vd[1] = res[1]; + vd[0] = res[0]; +} + +void HELPER(vsha2ms_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + uint32_t sew = FIELD_EX64(env->vtype, VTYPE, VSEW); + uint32_t esz = sew == MO_32 ? 4 : 8; + uint32_t total_elems; + uint32_t vta = vext_vta(desc); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + if (sew == MO_32) { + vsha2ms_e32(((uint32_t *)vd) + i * 4, ((uint32_t *)vs1) + i * 4, + ((uint32_t *)vs2) + i * 4); + } else { + /* If not 32 then SEW should be 64 */ + vsha2ms_e64(((uint64_t *)vd) + i * 4, ((uint64_t *)vs1) + i * 4, + ((uint64_t *)vs2) + i * 4); + } + } + /* set tail elements to 1s */ + total_elems = vext_get_total_elems(env, desc, esz); + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} + +static inline uint64_t sum0_64(uint64_t x) +{ + return ror64(x, 28) ^ ror64(x, 34) ^ ror64(x, 39); +} + +static inline uint32_t sum0_32(uint32_t x) +{ + return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22); +} + +static inline uint64_t sum1_64(uint64_t x) +{ + return ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41); +} + +static inline uint32_t sum1_32(uint32_t x) +{ + return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25); +} + +#define ch(x, y, z) ((x & y) ^ ((~x) & z)) + +#define maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) + +static void vsha2c_64(uint64_t *vs2, uint64_t *vd, uint64_t *vs1) +{ + uint64_t a = vs2[3], b = vs2[2], e = vs2[1], f = vs2[0]; + uint64_t c = vd[3], d = vd[2], g = vd[1], h = vd[0]; + uint64_t W0 = vs1[0], W1 = vs1[1]; + uint64_t T1 = h + sum1_64(e) + ch(e, f, g) + W0; + uint64_t T2 = sum0_64(a) + maj(a, b, c); + + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + T1 = h + sum1_64(e) + ch(e, f, g) + W1; + T2 = sum0_64(a) + maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + vd[0] = f; + vd[1] = e; + vd[2] = b; + vd[3] = a; +} + +static void vsha2c_32(uint32_t *vs2, uint32_t *vd, uint32_t *vs1) +{ + uint32_t a = vs2[H4(3)], b = vs2[H4(2)], e = vs2[H4(1)], f = vs2[H4(0)]; + uint32_t c = vd[H4(3)], d = vd[H4(2)], g = vd[H4(1)], h = vd[H4(0)]; + uint32_t W0 = vs1[H4(0)], W1 = vs1[H4(1)]; + uint32_t T1 = h + sum1_32(e) + ch(e, f, g) + W0; + uint32_t T2 = sum0_32(a) + maj(a, b, c); + + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + T1 = h + sum1_32(e) + ch(e, f, g) + W1; + T2 = sum0_32(a) + maj(a, b, c); + h = g; + g = f; + f = e; + e = d + T1; + d = c; + c = b; + b = a; + a = T1 + T2; + + vd[H4(0)] = f; + vd[H4(1)] = e; + vd[H4(2)] = b; + vd[H4(3)] = a; +} + +void HELPER(vsha2ch32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + const uint32_t esz = 4; + uint32_t total_elems; + uint32_t vta = vext_vta(desc); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i, + ((uint32_t *)vs1) + 4 * i + 2); + } + + /* set tail elements to 1s */ + total_elems = vext_get_total_elems(env, desc, esz); + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} + +void HELPER(vsha2ch64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + const uint32_t esz = 8; + uint32_t total_elems; + uint32_t vta = vext_vta(desc); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i, + ((uint64_t *)vs1) + 4 * i + 2); + } + + /* set tail elements to 1s */ + total_elems = vext_get_total_elems(env, desc, esz); + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} + +void HELPER(vsha2cl32_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + const uint32_t esz = 4; + uint32_t total_elems; + uint32_t vta = vext_vta(desc); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + vsha2c_32(((uint32_t *)vs2) + 4 * i, ((uint32_t *)vd) + 4 * i, + (((uint32_t *)vs1) + 4 * i)); + } + + /* set tail elements to 1s */ + total_elems = vext_get_total_elems(env, desc, esz); + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} + +void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, + uint32_t desc) +{ + uint32_t esz = 8; + uint32_t total_elems; + uint32_t vta = vext_vta(desc); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + vsha2c_64(((uint64_t *)vs2) + 4 * i, ((uint64_t *)vd) + 4 * i, + (((uint64_t *)vs1) + 4 * i)); + } + + /* set tail elements to 1s */ + total_elems = vext_get_total_elems(env, desc, esz); + vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} From 2350881c44bdc7c72de6525dbfadddb93ebfd146 Mon Sep 17 00:00:00 2001 From: Lawrence Hunter Date: Wed, 12 Jul 2023 00:59:10 +0800 Subject: [PATCH 18/45] target/riscv: Add Zvksh ISA extension support This commit adds support for the Zvksh vector-crypto extension, which consists of the following instructions: * vsm3me.vv * vsm3c.vi Translation functions are defined in `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in `target/riscv/vcrypto_helper.c`. Co-authored-by: Kiran Ostrolenk [max.chou@sifive.com: Replaced vstart checking by TCG op] Signed-off-by: Kiran Ostrolenk Signed-off-by: Lawrence Hunter Signed-off-by: Max Chou Reviewed-by: Daniel Henrique Barboza [max.chou@sifive.com: Exposed x-zvksh property] Message-ID: <20230711165917.2629866-12-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 6 +- target/riscv/cpu_cfg.h | 1 + target/riscv/helper.h | 3 + target/riscv/insn32.decode | 4 + target/riscv/insn_trans/trans_rvvk.c.inc | 31 ++++++ target/riscv/vcrypto_helper.c | 134 +++++++++++++++++++++++ 6 files changed, 177 insertions(+), 2 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index f103f536fd..ce0d32eef3 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -132,6 +132,7 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), + ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh), ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), ISA_EXT_DATA_ENTRY(smaia, PRIV_VERSION_1_12_0, ext_smaia), @@ -1280,8 +1281,8 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) * In principle Zve*x would also suffice here, were they supported * in qemu */ - if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha) && - !cpu->cfg.ext_zve32f) { + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha || + cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { error_setg(errp, "Vector crypto extensions require V or Zve* extensions"); return; @@ -1882,6 +1883,7 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), + DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index 800b8783c1..ab2d9294db 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -90,6 +90,7 @@ struct RISCVCPUConfig { bool ext_zvkned; bool ext_zvknha; bool ext_zvknhb; + bool ext_zvksh; bool ext_zmmul; bool ext_zvfbfmin; bool ext_zvfbfwma; diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 34329b52fe..6d21347c39 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1270,3 +1270,6 @@ DEF_HELPER_5(vsha2ch32_vv, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vsha2ch64_vv, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vsha2cl32_vv, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32) + +DEF_HELPER_5(vsm3me_vv, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index e2b83186dc..4050e843f7 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -991,3 +991,7 @@ vaeskf2_vi 101010 1 ..... ..... 010 ..... 1110111 @r_vm_1 vsha2ms_vv 101101 1 ..... ..... 010 ..... 1110111 @r_vm_1 vsha2ch_vv 101110 1 ..... ..... 010 ..... 1110111 @r_vm_1 vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1 + +# *** Zvksh vector crypto extension *** +vsm3me_vv 100000 1 ..... ..... 010 ..... 1110111 @r_vm_1 +vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1 diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc index a35be11b95..6469dd2f02 100644 --- a/target/riscv/insn_trans/trans_rvvk.c.inc +++ b/target/riscv/insn_trans/trans_rvvk.c.inc @@ -500,3 +500,34 @@ static bool trans_vsha2ch_vv(DisasContext *s, arg_rmrr *a) } return false; } + +/* + * Zvksh + */ + +#define ZVKSH_EGS 8 + +static inline bool vsm3_check(DisasContext *s, arg_rmrr *a) +{ + int egw_bytes = ZVKSH_EGS << s->sew; + int mult = 1 << MAX(s->lmul, 0); + return s->cfg_ptr->ext_zvksh == true && + require_rvv(s) && + vext_check_isa_ill(s) && + !is_overlapped(a->rd, mult, a->rs2, mult) && + MAXSZ(s) >= egw_bytes && + s->sew == MO_32; +} + +static inline bool vsm3me_check(DisasContext *s, arg_rmrr *a) +{ + return vsm3_check(s, a) && vext_check_sss(s, a->rd, a->rs1, a->rs2, a->vm); +} + +static inline bool vsm3c_check(DisasContext *s, arg_rmrr *a) +{ + return vsm3_check(s, a) && vext_check_ss(s, a->rd, a->rs2, a->vm); +} + +GEN_VV_UNMASKED_TRANS(vsm3me_vv, vsm3me_check, ZVKSH_EGS) +GEN_VI_UNMASKED_TRANS(vsm3c_vi, vsm3c_check, ZVKSH_EGS) diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c index 2f2099b6fb..e8bbb698c1 100644 --- a/target/riscv/vcrypto_helper.c +++ b/target/riscv/vcrypto_helper.c @@ -635,3 +635,137 @@ void HELPER(vsha2cl64_vv)(void *vd, void *vs1, void *vs2, CPURISCVState *env, vext_set_elems_1s(vd, vta, env->vl * esz, total_elems * esz); env->vstart = 0; } + +static inline uint32_t p1(uint32_t x) +{ + return x ^ rol32(x, 15) ^ rol32(x, 23); +} + +static inline uint32_t zvksh_w(uint32_t m16, uint32_t m9, uint32_t m3, + uint32_t m13, uint32_t m6) +{ + return p1(m16 ^ m9 ^ rol32(m3, 15)) ^ rol32(m13, 7) ^ m6; +} + +void HELPER(vsm3me_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr, + CPURISCVState *env, uint32_t desc) +{ + uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW)); + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); + uint32_t *vd = vd_vptr; + uint32_t *vs1 = vs1_vptr; + uint32_t *vs2 = vs2_vptr; + + for (int i = env->vstart / 8; i < env->vl / 8; i++) { + uint32_t w[24]; + for (int j = 0; j < 8; j++) { + w[j] = bswap32(vs1[H4((i * 8) + j)]); + w[j + 8] = bswap32(vs2[H4((i * 8) + j)]); + } + for (int j = 0; j < 8; j++) { + w[j + 16] = + zvksh_w(w[j], w[j + 7], w[j + 13], w[j + 3], w[j + 10]); + } + for (int j = 0; j < 8; j++) { + vd[(i * 8) + j] = bswap32(w[H4(j + 16)]); + } + } + vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} + +static inline uint32_t ff1(uint32_t x, uint32_t y, uint32_t z) +{ + return x ^ y ^ z; +} + +static inline uint32_t ff2(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & y) | (x & z) | (y & z); +} + +static inline uint32_t ff_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j) +{ + return (j <= 15) ? ff1(x, y, z) : ff2(x, y, z); +} + +static inline uint32_t gg1(uint32_t x, uint32_t y, uint32_t z) +{ + return x ^ y ^ z; +} + +static inline uint32_t gg2(uint32_t x, uint32_t y, uint32_t z) +{ + return (x & y) | (~x & z); +} + +static inline uint32_t gg_j(uint32_t x, uint32_t y, uint32_t z, uint32_t j) +{ + return (j <= 15) ? gg1(x, y, z) : gg2(x, y, z); +} + +static inline uint32_t t_j(uint32_t j) +{ + return (j <= 15) ? 0x79cc4519 : 0x7a879d8a; +} + +static inline uint32_t p_0(uint32_t x) +{ + return x ^ rol32(x, 9) ^ rol32(x, 17); +} + +static void sm3c(uint32_t *vd, uint32_t *vs1, uint32_t *vs2, uint32_t uimm) +{ + uint32_t x0, x1; + uint32_t j; + uint32_t ss1, ss2, tt1, tt2; + x0 = vs2[0] ^ vs2[4]; + x1 = vs2[1] ^ vs2[5]; + j = 2 * uimm; + ss1 = rol32(rol32(vs1[0], 12) + vs1[4] + rol32(t_j(j), j % 32), 7); + ss2 = ss1 ^ rol32(vs1[0], 12); + tt1 = ff_j(vs1[0], vs1[1], vs1[2], j) + vs1[3] + ss2 + x0; + tt2 = gg_j(vs1[4], vs1[5], vs1[6], j) + vs1[7] + ss1 + vs2[0]; + vs1[3] = vs1[2]; + vd[3] = rol32(vs1[1], 9); + vs1[1] = vs1[0]; + vd[1] = tt1; + vs1[7] = vs1[6]; + vd[7] = rol32(vs1[5], 19); + vs1[5] = vs1[4]; + vd[5] = p_0(tt2); + j = 2 * uimm + 1; + ss1 = rol32(rol32(vd[1], 12) + vd[5] + rol32(t_j(j), j % 32), 7); + ss2 = ss1 ^ rol32(vd[1], 12); + tt1 = ff_j(vd[1], vs1[1], vd[3], j) + vs1[3] + ss2 + x1; + tt2 = gg_j(vd[5], vs1[5], vd[7], j) + vs1[7] + ss1 + vs2[1]; + vd[2] = rol32(vs1[1], 9); + vd[0] = tt1; + vd[6] = rol32(vs1[5], 19); + vd[4] = p_0(tt2); +} + +void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, + CPURISCVState *env, uint32_t desc) +{ + uint32_t esz = memop_size(FIELD_EX64(env->vtype, VTYPE, VSEW)); + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + uint32_t vta = vext_vta(desc); + uint32_t *vd = vd_vptr; + uint32_t *vs2 = vs2_vptr; + uint32_t v1[8], v2[8], v3[8]; + + for (int i = env->vstart / 8; i < env->vl / 8; i++) { + for (int k = 0; k < 8; k++) { + v2[k] = bswap32(vd[H4(i * 8 + k)]); + v3[k] = bswap32(vs2[H4(i * 8 + k)]); + } + sm3c(v1, v2, v3, uimm); + for (int k = 0; k < 8; k++) { + vd[i * 8 + k] = bswap32(v1[H4(k)]); + } + } + vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz); + env->vstart = 0; +} From 767eb03548f75b1d7c446305dd5bdb074c0b6d8f Mon Sep 17 00:00:00 2001 From: Nazar Kazakov Date: Wed, 12 Jul 2023 00:59:11 +0800 Subject: [PATCH 19/45] target/riscv: Add Zvkg ISA extension support This commit adds support for the Zvkg vector-crypto extension, which consists of the following instructions: * vgmul.vv * vghsh.vv Translation functions are defined in `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in `target/riscv/vcrypto_helper.c`. Co-authored-by: Lawrence Hunter [max.chou@sifive.com: Replaced vstart checking by TCG op] Signed-off-by: Lawrence Hunter Signed-off-by: Nazar Kazakov Signed-off-by: Max Chou Reviewed-by: Daniel Henrique Barboza [max.chou@sifive.com: Exposed x-zvkg property] [max.chou@sifive.com: Replaced uint by int for cross win32 build] Message-ID: <20230711165917.2629866-13-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 6 +- target/riscv/cpu_cfg.h | 1 + target/riscv/helper.h | 3 + target/riscv/insn32.decode | 4 ++ target/riscv/insn_trans/trans_rvvk.c.inc | 30 ++++++++++ target/riscv/vcrypto_helper.c | 72 ++++++++++++++++++++++++ 6 files changed, 114 insertions(+), 2 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index ce0d32eef3..981907c033 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -129,6 +129,7 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zvfbfwma, PRIV_VERSION_1_12_0, ext_zvfbfwma), ISA_EXT_DATA_ENTRY(zvfh, PRIV_VERSION_1_12_0, ext_zvfh), ISA_EXT_DATA_ENTRY(zvfhmin, PRIV_VERSION_1_12_0, ext_zvfhmin), + ISA_EXT_DATA_ENTRY(zvkg, PRIV_VERSION_1_12_0, ext_zvkg), ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), @@ -1281,8 +1282,8 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) * In principle Zve*x would also suffice here, were they supported * in qemu */ - if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkned || cpu->cfg.ext_zvknha || - cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { + if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned || + cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { error_setg(errp, "Vector crypto extensions require V or Zve* extensions"); return; @@ -1880,6 +1881,7 @@ static Property riscv_cpu_extensions[] = { /* Vector cryptography extensions */ DEFINE_PROP_BOOL("x-zvbb", RISCVCPU, cfg.ext_zvbb, false), DEFINE_PROP_BOOL("x-zvbc", RISCVCPU, cfg.ext_zvbc, false), + DEFINE_PROP_BOOL("x-zvkg", RISCVCPU, cfg.ext_zvkg, false), DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index ab2d9294db..b754ec2344 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -87,6 +87,7 @@ struct RISCVCPUConfig { bool ext_zve64d; bool ext_zvbb; bool ext_zvbc; + bool ext_zvkg; bool ext_zvkned; bool ext_zvknha; bool ext_zvknhb; diff --git a/target/riscv/helper.h b/target/riscv/helper.h index 6d21347c39..ceec97e165 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1273,3 +1273,6 @@ DEF_HELPER_5(vsha2cl64_vv, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vsm3me_vv, void, ptr, ptr, ptr, env, i32) DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32) + +DEF_HELPER_5(vghsh_vv, void, ptr, ptr, ptr, env, i32) +DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 4050e843f7..0fae01c6bb 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -995,3 +995,7 @@ vsha2cl_vv 101111 1 ..... ..... 010 ..... 1110111 @r_vm_1 # *** Zvksh vector crypto extension *** vsm3me_vv 100000 1 ..... ..... 010 ..... 1110111 @r_vm_1 vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1 + +# *** Zvkg vector crypto extension *** +vghsh_vv 101100 1 ..... ..... 010 ..... 1110111 @r_vm_1 +vgmul_vv 101000 1 ..... 10001 010 ..... 1110111 @r2_vm_1 diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc index 6469dd2f02..af7cd62e7d 100644 --- a/target/riscv/insn_trans/trans_rvvk.c.inc +++ b/target/riscv/insn_trans/trans_rvvk.c.inc @@ -531,3 +531,33 @@ static inline bool vsm3c_check(DisasContext *s, arg_rmrr *a) GEN_VV_UNMASKED_TRANS(vsm3me_vv, vsm3me_check, ZVKSH_EGS) GEN_VI_UNMASKED_TRANS(vsm3c_vi, vsm3c_check, ZVKSH_EGS) + +/* + * Zvkg + */ + +#define ZVKG_EGS 4 + +static bool vgmul_check(DisasContext *s, arg_rmr *a) +{ + int egw_bytes = ZVKG_EGS << s->sew; + return s->cfg_ptr->ext_zvkg == true && + vext_check_isa_ill(s) && + require_rvv(s) && + MAXSZ(s) >= egw_bytes && + vext_check_ss(s, a->rd, a->rs2, a->vm) && + s->sew == MO_32; +} + +GEN_V_UNMASKED_TRANS(vgmul_vv, vgmul_check, ZVKG_EGS) + +static bool vghsh_check(DisasContext *s, arg_rmrr *a) +{ + int egw_bytes = ZVKG_EGS << s->sew; + return s->cfg_ptr->ext_zvkg == true && + opivv_check(s, a) && + MAXSZ(s) >= egw_bytes && + s->sew == MO_32; +} + +GEN_VV_UNMASKED_TRANS(vghsh_vv, vghsh_check, ZVKG_EGS) diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c index e8bbb698c1..a5e2f7fbb0 100644 --- a/target/riscv/vcrypto_helper.c +++ b/target/riscv/vcrypto_helper.c @@ -769,3 +769,75 @@ void HELPER(vsm3c_vi)(void *vd_vptr, void *vs2_vptr, uint32_t uimm, vext_set_elems_1s(vd_vptr, vta, env->vl * esz, total_elems * esz); env->vstart = 0; } + +void HELPER(vghsh_vv)(void *vd_vptr, void *vs1_vptr, void *vs2_vptr, + CPURISCVState *env, uint32_t desc) +{ + uint64_t *vd = vd_vptr; + uint64_t *vs1 = vs1_vptr; + uint64_t *vs2 = vs2_vptr; + uint32_t vta = vext_vta(desc); + uint32_t total_elems = vext_get_total_elems(env, desc, 4); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + uint64_t Y[2] = {vd[i * 2 + 0], vd[i * 2 + 1]}; + uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])}; + uint64_t X[2] = {vs1[i * 2 + 0], vs1[i * 2 + 1]}; + uint64_t Z[2] = {0, 0}; + + uint64_t S[2] = {brev8(Y[0] ^ X[0]), brev8(Y[1] ^ X[1])}; + + for (int j = 0; j < 128; j++) { + if ((S[j / 64] >> (j % 64)) & 1) { + Z[0] ^= H[0]; + Z[1] ^= H[1]; + } + bool reduce = ((H[1] >> 63) & 1); + H[1] = H[1] << 1 | H[0] >> 63; + H[0] = H[0] << 1; + if (reduce) { + H[0] ^= 0x87; + } + } + + vd[i * 2 + 0] = brev8(Z[0]); + vd[i * 2 + 1] = brev8(Z[1]); + } + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4); + env->vstart = 0; +} + +void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env, + uint32_t desc) +{ + uint64_t *vd = vd_vptr; + uint64_t *vs2 = vs2_vptr; + uint32_t vta = vext_vta(desc); + uint32_t total_elems = vext_get_total_elems(env, desc, 4); + + for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++) { + uint64_t Y[2] = {brev8(vd[i * 2 + 0]), brev8(vd[i * 2 + 1])}; + uint64_t H[2] = {brev8(vs2[i * 2 + 0]), brev8(vs2[i * 2 + 1])}; + uint64_t Z[2] = {0, 0}; + + for (int j = 0; j < 128; j++) { + if ((Y[j / 64] >> (j % 64)) & 1) { + Z[0] ^= H[0]; + Z[1] ^= H[1]; + } + bool reduce = ((H[1] >> 63) & 1); + H[1] = H[1] << 1 | H[0] >> 63; + H[0] = H[0] << 1; + if (reduce) { + H[0] ^= 0x87; + } + } + + vd[i * 2 + 0] = brev8(Z[0]); + vd[i * 2 + 1] = brev8(Z[1]); + } + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4); + env->vstart = 0; +} From f6ef550fe5c915d1d74ce9854280d16a8d71e230 Mon Sep 17 00:00:00 2001 From: Max Chou Date: Wed, 12 Jul 2023 00:59:12 +0800 Subject: [PATCH 20/45] crypto: Create sm4_subword Allows sharing of sm4_subword between different targets. Signed-off-by: Max Chou Reviewed-by: Frank Chang Reviewed-by: Richard Henderson Signed-off-by: Max Chou Message-ID: <20230711165917.2629866-14-max.chou@sifive.com> Signed-off-by: Alistair Francis --- include/crypto/sm4.h | 8 ++++++++ target/arm/tcg/crypto_helper.c | 10 ++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h index 9bd3ebc62e..de8245d8a7 100644 --- a/include/crypto/sm4.h +++ b/include/crypto/sm4.h @@ -3,4 +3,12 @@ extern const uint8_t sm4_sbox[256]; +static inline uint32_t sm4_subword(uint32_t word) +{ + return sm4_sbox[word & 0xff] | + sm4_sbox[(word >> 8) & 0xff] << 8 | + sm4_sbox[(word >> 16) & 0xff] << 16 | + sm4_sbox[(word >> 24) & 0xff] << 24; +} + #endif diff --git a/target/arm/tcg/crypto_helper.c b/target/arm/tcg/crypto_helper.c index fdd70abbfd..7cadd61e12 100644 --- a/target/arm/tcg/crypto_helper.c +++ b/target/arm/tcg/crypto_helper.c @@ -614,10 +614,7 @@ static void do_crypto_sm4e(uint64_t *rd, uint64_t *rn, uint64_t *rm) CR_ST_WORD(d, (i + 3) % 4) ^ CR_ST_WORD(n, i); - t = sm4_sbox[t & 0xff] | - sm4_sbox[(t >> 8) & 0xff] << 8 | - sm4_sbox[(t >> 16) & 0xff] << 16 | - sm4_sbox[(t >> 24) & 0xff] << 24; + t = sm4_subword(t); CR_ST_WORD(d, i) ^= t ^ rol32(t, 2) ^ rol32(t, 10) ^ rol32(t, 18) ^ rol32(t, 24); @@ -651,10 +648,7 @@ static void do_crypto_sm4ekey(uint64_t *rd, uint64_t *rn, uint64_t *rm) CR_ST_WORD(d, (i + 3) % 4) ^ CR_ST_WORD(m, i); - t = sm4_sbox[t & 0xff] | - sm4_sbox[(t >> 8) & 0xff] << 8 | - sm4_sbox[(t >> 16) & 0xff] << 16 | - sm4_sbox[(t >> 24) & 0xff] << 24; + t = sm4_subword(t); CR_ST_WORD(d, i) ^= t ^ rol32(t, 13) ^ rol32(t, 23); } From f5f3a9152ae1242f5d1113852aa97d13b382815b Mon Sep 17 00:00:00 2001 From: Max Chou Date: Wed, 12 Jul 2023 00:59:13 +0800 Subject: [PATCH 21/45] crypto: Add SM4 constant parameter CK Adds sm4_ck constant for use in sm4 cryptography across different targets. Signed-off-by: Max Chou Reviewed-by: Frank Chang Signed-off-by: Max Chou Message-ID: <20230711165917.2629866-15-max.chou@sifive.com> Signed-off-by: Alistair Francis --- crypto/sm4.c | 10 ++++++++++ include/crypto/sm4.h | 1 + 2 files changed, 11 insertions(+) diff --git a/crypto/sm4.c b/crypto/sm4.c index 9f0cd452c7..2987306cf7 100644 --- a/crypto/sm4.c +++ b/crypto/sm4.c @@ -47,3 +47,13 @@ uint8_t const sm4_sbox[] = { 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48, }; +uint32_t const sm4_ck[] = { + 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, + 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, + 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, + 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, + 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, + 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, + 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, + 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 +}; diff --git a/include/crypto/sm4.h b/include/crypto/sm4.h index de8245d8a7..382b26d922 100644 --- a/include/crypto/sm4.h +++ b/include/crypto/sm4.h @@ -2,6 +2,7 @@ #define QEMU_SM4_H extern const uint8_t sm4_sbox[256]; +extern const uint32_t sm4_ck[32]; static inline uint32_t sm4_subword(uint32_t word) { From 8b045ff45420d12bdbd9868e83559ffaaf059144 Mon Sep 17 00:00:00 2001 From: Max Chou Date: Wed, 12 Jul 2023 00:59:14 +0800 Subject: [PATCH 22/45] target/riscv: Add Zvksed ISA extension support This commit adds support for the Zvksed vector-crypto extension, which consists of the following instructions: * vsm4k.vi * vsm4r.[vv,vs] Translation functions are defined in `target/riscv/insn_trans/trans_rvvk.c.inc` and helpers are defined in `target/riscv/vcrypto_helper.c`. Signed-off-by: Max Chou Reviewed-by: Frank Chang [lawrence.hunter@codethink.co.uk: Moved SM4 functions from crypto_helper.c to vcrypto_helper.c] [nazar.kazakov@codethink.co.uk: Added alignment checks, refactored code to use macros, and minor style changes] Signed-off-by: Max Chou Message-ID: <20230711165917.2629866-16-max.chou@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 5 +- target/riscv/cpu_cfg.h | 1 + target/riscv/helper.h | 4 + target/riscv/insn32.decode | 5 + target/riscv/insn_trans/trans_rvvk.c.inc | 43 ++++++++ target/riscv/vcrypto_helper.c | 127 +++++++++++++++++++++++ 6 files changed, 184 insertions(+), 1 deletion(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 981907c033..dc4b88e625 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -133,6 +133,7 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zvkned, PRIV_VERSION_1_12_0, ext_zvkned), ISA_EXT_DATA_ENTRY(zvknha, PRIV_VERSION_1_12_0, ext_zvknha), ISA_EXT_DATA_ENTRY(zvknhb, PRIV_VERSION_1_12_0, ext_zvknhb), + ISA_EXT_DATA_ENTRY(zvksed, PRIV_VERSION_1_12_0, ext_zvksed), ISA_EXT_DATA_ENTRY(zvksh, PRIV_VERSION_1_12_0, ext_zvksh), ISA_EXT_DATA_ENTRY(zhinx, PRIV_VERSION_1_12_0, ext_zhinx), ISA_EXT_DATA_ENTRY(zhinxmin, PRIV_VERSION_1_12_0, ext_zhinxmin), @@ -1283,7 +1284,8 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) * in qemu */ if ((cpu->cfg.ext_zvbb || cpu->cfg.ext_zvkg || cpu->cfg.ext_zvkned || - cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksh) && !cpu->cfg.ext_zve32f) { + cpu->cfg.ext_zvknha || cpu->cfg.ext_zvksed || cpu->cfg.ext_zvksh) && + !cpu->cfg.ext_zve32f) { error_setg(errp, "Vector crypto extensions require V or Zve* extensions"); return; @@ -1885,6 +1887,7 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("x-zvkned", RISCVCPU, cfg.ext_zvkned, false), DEFINE_PROP_BOOL("x-zvknha", RISCVCPU, cfg.ext_zvknha, false), DEFINE_PROP_BOOL("x-zvknhb", RISCVCPU, cfg.ext_zvknhb, false), + DEFINE_PROP_BOOL("x-zvksed", RISCVCPU, cfg.ext_zvksed, false), DEFINE_PROP_BOOL("x-zvksh", RISCVCPU, cfg.ext_zvksh, false), DEFINE_PROP_END_OF_LIST(), diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index b754ec2344..61f6238756 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -91,6 +91,7 @@ struct RISCVCPUConfig { bool ext_zvkned; bool ext_zvknha; bool ext_zvknhb; + bool ext_zvksed; bool ext_zvksh; bool ext_zmmul; bool ext_zvfbfmin; diff --git a/target/riscv/helper.h b/target/riscv/helper.h index ceec97e165..8a63523851 100644 --- a/target/riscv/helper.h +++ b/target/riscv/helper.h @@ -1276,3 +1276,7 @@ DEF_HELPER_5(vsm3c_vi, void, ptr, ptr, i32, env, i32) DEF_HELPER_5(vghsh_vv, void, ptr, ptr, ptr, env, i32) DEF_HELPER_4(vgmul_vv, void, ptr, ptr, env, i32) + +DEF_HELPER_5(vsm4k_vi, void, ptr, ptr, i32, env, i32) +DEF_HELPER_4(vsm4r_vv, void, ptr, ptr, env, i32) +DEF_HELPER_4(vsm4r_vs, void, ptr, ptr, env, i32) diff --git a/target/riscv/insn32.decode b/target/riscv/insn32.decode index 0fae01c6bb..33597fe2bb 100644 --- a/target/riscv/insn32.decode +++ b/target/riscv/insn32.decode @@ -999,3 +999,8 @@ vsm3c_vi 101011 1 ..... ..... 010 ..... 1110111 @r_vm_1 # *** Zvkg vector crypto extension *** vghsh_vv 101100 1 ..... ..... 010 ..... 1110111 @r_vm_1 vgmul_vv 101000 1 ..... 10001 010 ..... 1110111 @r2_vm_1 + +# *** Zvksed vector crypto extension *** +vsm4k_vi 100001 1 ..... ..... 010 ..... 1110111 @r_vm_1 +vsm4r_vv 101000 1 ..... 10000 010 ..... 1110111 @r2_vm_1 +vsm4r_vs 101001 1 ..... 10000 010 ..... 1110111 @r2_vm_1 diff --git a/target/riscv/insn_trans/trans_rvvk.c.inc b/target/riscv/insn_trans/trans_rvvk.c.inc index af7cd62e7d..c00c70dfc6 100644 --- a/target/riscv/insn_trans/trans_rvvk.c.inc +++ b/target/riscv/insn_trans/trans_rvvk.c.inc @@ -561,3 +561,46 @@ static bool vghsh_check(DisasContext *s, arg_rmrr *a) } GEN_VV_UNMASKED_TRANS(vghsh_vv, vghsh_check, ZVKG_EGS) + +/* + * Zvksed + */ + +#define ZVKSED_EGS 4 + +static bool zvksed_check(DisasContext *s) +{ + int egw_bytes = ZVKSED_EGS << s->sew; + return s->cfg_ptr->ext_zvksed == true && + require_rvv(s) && + vext_check_isa_ill(s) && + MAXSZ(s) >= egw_bytes && + s->sew == MO_32; +} + +static bool vsm4k_vi_check(DisasContext *s, arg_rmrr *a) +{ + return zvksed_check(s) && + require_align(a->rd, s->lmul) && + require_align(a->rs2, s->lmul); +} + +GEN_VI_UNMASKED_TRANS(vsm4k_vi, vsm4k_vi_check, ZVKSED_EGS) + +static bool vsm4r_vv_check(DisasContext *s, arg_rmr *a) +{ + return zvksed_check(s) && + require_align(a->rd, s->lmul) && + require_align(a->rs2, s->lmul); +} + +GEN_V_UNMASKED_TRANS(vsm4r_vv, vsm4r_vv_check, ZVKSED_EGS) + +static bool vsm4r_vs_check(DisasContext *s, arg_rmr *a) +{ + return zvksed_check(s) && + !is_overlapped(a->rd, 1 << MAX(s->lmul, 0), a->rs2, 1) && + require_align(a->rd, s->lmul); +} + +GEN_V_UNMASKED_TRANS(vsm4r_vs, vsm4r_vs_check, ZVKSED_EGS) diff --git a/target/riscv/vcrypto_helper.c b/target/riscv/vcrypto_helper.c index a5e2f7fbb0..e2d719b13b 100644 --- a/target/riscv/vcrypto_helper.c +++ b/target/riscv/vcrypto_helper.c @@ -24,6 +24,7 @@ #include "cpu.h" #include "crypto/aes.h" #include "crypto/aes-round.h" +#include "crypto/sm4.h" #include "exec/memop.h" #include "exec/exec-all.h" #include "exec/helper-proto.h" @@ -841,3 +842,129 @@ void HELPER(vgmul_vv)(void *vd_vptr, void *vs2_vptr, CPURISCVState *env, vext_set_elems_1s(vd, vta, env->vl * 4, total_elems * 4); env->vstart = 0; } + +void HELPER(vsm4k_vi)(void *vd, void *vs2, uint32_t uimm5, CPURISCVState *env, + uint32_t desc) +{ + const uint32_t egs = 4; + uint32_t rnd = uimm5 & 0x7; + uint32_t group_start = env->vstart / egs; + uint32_t group_end = env->vl / egs; + uint32_t esz = sizeof(uint32_t); + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + + for (uint32_t i = group_start; i < group_end; ++i) { + uint32_t vstart = i * egs; + uint32_t vend = (i + 1) * egs; + uint32_t rk[4] = {0}; + uint32_t tmp[8] = {0}; + + for (uint32_t j = vstart; j < vend; ++j) { + rk[j - vstart] = *((uint32_t *)vs2 + H4(j)); + } + + for (uint32_t j = 0; j < egs; ++j) { + tmp[j] = rk[j]; + } + + for (uint32_t j = 0; j < egs; ++j) { + uint32_t b, s; + b = tmp[j + 1] ^ tmp[j + 2] ^ tmp[j + 3] ^ sm4_ck[rnd * 4 + j]; + + s = sm4_subword(b); + + tmp[j + 4] = tmp[j] ^ (s ^ rol32(s, 13) ^ rol32(s, 23)); + } + + for (uint32_t j = vstart; j < vend; ++j) { + *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)]; + } + } + + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz); +} + +static void do_sm4_round(uint32_t *rk, uint32_t *buf) +{ + const uint32_t egs = 4; + uint32_t s, b; + + for (uint32_t j = egs; j < egs * 2; ++j) { + b = buf[j - 3] ^ buf[j - 2] ^ buf[j - 1] ^ rk[j - 4]; + + s = sm4_subword(b); + + buf[j] = buf[j - 4] ^ (s ^ rol32(s, 2) ^ rol32(s, 10) ^ rol32(s, 18) ^ + rol32(s, 24)); + } +} + +void HELPER(vsm4r_vv)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) +{ + const uint32_t egs = 4; + uint32_t group_start = env->vstart / egs; + uint32_t group_end = env->vl / egs; + uint32_t esz = sizeof(uint32_t); + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + + for (uint32_t i = group_start; i < group_end; ++i) { + uint32_t vstart = i * egs; + uint32_t vend = (i + 1) * egs; + uint32_t rk[4] = {0}; + uint32_t tmp[8] = {0}; + + for (uint32_t j = vstart; j < vend; ++j) { + rk[j - vstart] = *((uint32_t *)vs2 + H4(j)); + } + + for (uint32_t j = vstart; j < vend; ++j) { + tmp[j - vstart] = *((uint32_t *)vd + H4(j)); + } + + do_sm4_round(rk, tmp); + + for (uint32_t j = vstart; j < vend; ++j) { + *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)]; + } + } + + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz); +} + +void HELPER(vsm4r_vs)(void *vd, void *vs2, CPURISCVState *env, uint32_t desc) +{ + const uint32_t egs = 4; + uint32_t group_start = env->vstart / egs; + uint32_t group_end = env->vl / egs; + uint32_t esz = sizeof(uint32_t); + uint32_t total_elems = vext_get_total_elems(env, desc, esz); + + for (uint32_t i = group_start; i < group_end; ++i) { + uint32_t vstart = i * egs; + uint32_t vend = (i + 1) * egs; + uint32_t rk[4] = {0}; + uint32_t tmp[8] = {0}; + + for (uint32_t j = 0; j < egs; ++j) { + rk[j] = *((uint32_t *)vs2 + H4(j)); + } + + for (uint32_t j = vstart; j < vend; ++j) { + tmp[j - vstart] = *((uint32_t *)vd + H4(j)); + } + + do_sm4_round(rk, tmp); + + for (uint32_t j = vstart; j < vend; ++j) { + *((uint32_t *)vd + H4(j)) = tmp[egs + (j - vstart)]; + } + } + + env->vstart = 0; + /* set tail elements to 1s */ + vext_set_elems_1s(vd, vext_vta(desc), env->vl * esz, total_elems * esz); +} From ebe16b90395c70a8edbb7a0e855a8de2d3cfb4f9 Mon Sep 17 00:00:00 2001 From: Rob Bradford Date: Wed, 2 Aug 2023 13:49:06 +0100 Subject: [PATCH 23/45] target/riscv: Implement WARL behaviour for mcountinhibit/mcounteren These are WARL fields - zero out the bits for unavailable counters and special case the TM bit in mcountinhibit which is hardwired to zero. This patch achieves this by modifying the value written so that any use of the field will see the correctly masked bits. Tested by modifying OpenSBI to write max value to these CSRs and upon subsequent read the appropriate number of bits for number of PMUs is enabled and the TM bit is zero in mcountinhibit. Signed-off-by: Rob Bradford Acked-by: Alistair Francis Reviewed-by: Atish Patra Message-ID: <20230802124906.24197-1-rbradford@rivosinc.com> Signed-off-by: Alistair Francis --- target/riscv/csr.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/target/riscv/csr.c b/target/riscv/csr.c index ca95ae1527..661744e6d4 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -1833,8 +1833,11 @@ static RISCVException write_mcountinhibit(CPURISCVState *env, int csrno, { int cidx; PMUCTRState *counter; + RISCVCPU *cpu = env_archcpu(env); - env->mcountinhibit = val; + /* WARL register - disable unavailable counters; TM bit is always 0 */ + env->mcountinhibit = + val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_IR); /* Check if any other counter is also monitoring cycles/instructions */ for (cidx = 0; cidx < RV_MAX_MHPMCOUNTERS; cidx++) { @@ -1857,7 +1860,11 @@ static RISCVException read_mcounteren(CPURISCVState *env, int csrno, static RISCVException write_mcounteren(CPURISCVState *env, int csrno, target_ulong val) { - env->mcounteren = val; + RISCVCPU *cpu = env_archcpu(env); + + /* WARL register - disable unavailable counters */ + env->mcounteren = val & (cpu->pmu_avail_ctrs | COUNTEREN_CY | COUNTEREN_TM | + COUNTEREN_IR); return RISCV_EXCP_NONE; } From 0228aca23a96d869a889822ed4ded9ea6ea44f98 Mon Sep 17 00:00:00 2001 From: Jason Chien Date: Wed, 26 Jul 2023 07:40:46 +0000 Subject: [PATCH 24/45] target/riscv: Add Zihintntl extension ISA string to DTS RVA23 Profiles states: The RVA23 profiles are intended to be used for 64-bit application processors that will run rich OS stacks from standard binary OS distributions and with a substantial number of third-party binary user applications that will be supported over a considerable length of time in the field. The chapter 4 of the unprivileged spec introduces the Zihintntl extension and Zihintntl is a mandatory extension presented in RVA23 Profiles, whose purpose is to enable application and operating system portability across different implementations. Thus the DTS should contain the Zihintntl ISA string in order to pass to software. The unprivileged spec states: Like any HINTs, these instructions may be freely ignored. Hence, although they are described in terms of cache-based memory hierarchies, they do not mandate the provision of caches. These instructions are encoded with non-used opcode, e.g. ADD x0, x0, x2, which QEMU already supports, and QEMU does not emulate cache. Therefore these instructions can be considered as a no-op, and we only need to add a new property for the Zihintntl extension. Reviewed-by: Frank Chang Reviewed-by: Alistair Francis Signed-off-by: Jason Chien Message-ID: <20230726074049.19505-2-jason.chien@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 2 ++ target/riscv/cpu_cfg.h | 1 + 2 files changed, 3 insertions(+) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index dc4b88e625..fae1c92c5c 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -87,6 +87,7 @@ static const struct isa_ext_data isa_edata_arr[] = { ISA_EXT_DATA_ENTRY(zicond, PRIV_VERSION_1_12_0, ext_zicond), ISA_EXT_DATA_ENTRY(zicsr, PRIV_VERSION_1_10_0, ext_icsr), ISA_EXT_DATA_ENTRY(zifencei, PRIV_VERSION_1_10_0, ext_ifencei), + ISA_EXT_DATA_ENTRY(zihintntl, PRIV_VERSION_1_10_0, ext_zihintntl), ISA_EXT_DATA_ENTRY(zihintpause, PRIV_VERSION_1_10_0, ext_zihintpause), ISA_EXT_DATA_ENTRY(zmmul, PRIV_VERSION_1_12_0, ext_zmmul), ISA_EXT_DATA_ENTRY(zawrs, PRIV_VERSION_1_12_0, ext_zawrs), @@ -1790,6 +1791,7 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("sscofpmf", RISCVCPU, cfg.ext_sscofpmf, false), DEFINE_PROP_BOOL("Zifencei", RISCVCPU, cfg.ext_ifencei, true), DEFINE_PROP_BOOL("Zicsr", RISCVCPU, cfg.ext_icsr, true), + DEFINE_PROP_BOOL("Zihintntl", RISCVCPU, cfg.ext_zihintntl, true), DEFINE_PROP_BOOL("Zihintpause", RISCVCPU, cfg.ext_zihintpause, true), DEFINE_PROP_BOOL("Zawrs", RISCVCPU, cfg.ext_zawrs, true), DEFINE_PROP_BOOL("Zfa", RISCVCPU, cfg.ext_zfa, true), diff --git a/target/riscv/cpu_cfg.h b/target/riscv/cpu_cfg.h index 61f6238756..0e6a0f245c 100644 --- a/target/riscv/cpu_cfg.h +++ b/target/riscv/cpu_cfg.h @@ -66,6 +66,7 @@ struct RISCVCPUConfig { bool ext_icbom; bool ext_icboz; bool ext_zicond; + bool ext_zihintntl; bool ext_zihintpause; bool ext_smstateen; bool ext_sstc; From eda633a534f8af4abe3a88731bba6dacdb973993 Mon Sep 17 00:00:00 2001 From: LIU Zhiwei Date: Fri, 28 Jul 2023 08:39:06 +0800 Subject: [PATCH 25/45] target/riscv: Fix zfa fleq.d and fltq.d Commit a47842d ("riscv: Add support for the Zfa extension") implemented the zfa extension. However, it has some typos for fleq.d and fltq.d. Both of them misused the fltq.s helper function. Fixes: a47842d ("riscv: Add support for the Zfa extension") Signed-off-by: LIU Zhiwei Reviewed-by: Daniel Henrique Barboza Reviewed-by: Weiwei Li Message-ID: <20230728003906.768-1-zhiwei_liu@linux.alibaba.com> Signed-off-by: Alistair Francis --- target/riscv/insn_trans/trans_rvzfa.c.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/riscv/insn_trans/trans_rvzfa.c.inc b/target/riscv/insn_trans/trans_rvzfa.c.inc index 2c715af3e5..0fdd2698f6 100644 --- a/target/riscv/insn_trans/trans_rvzfa.c.inc +++ b/target/riscv/insn_trans/trans_rvzfa.c.inc @@ -470,7 +470,7 @@ bool trans_fleq_d(DisasContext *ctx, arg_fleq_d *a) TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_fltq_s(dest, cpu_env, src1, src2); + gen_helper_fleq_d(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } @@ -485,7 +485,7 @@ bool trans_fltq_d(DisasContext *ctx, arg_fltq_d *a) TCGv_i64 src1 = get_fpr_hs(ctx, a->rs1); TCGv_i64 src2 = get_fpr_hs(ctx, a->rs2); - gen_helper_fltq_s(dest, cpu_env, src1, src2); + gen_helper_fltq_d(dest, cpu_env, src1, src2); gen_set_gpr(ctx, a->rd, dest); return true; } From e0922b73baf00c4c19d4ad30d09bb94f7ffea0f4 Mon Sep 17 00:00:00 2001 From: Jason Chien Date: Fri, 28 Jul 2023 08:24:38 +0000 Subject: [PATCH 26/45] hw/intc: Fix upper/lower mtime write calculation When writing the upper mtime, we should keep the original lower mtime whose value is given by cpu_riscv_read_rtc() instead of cpu_riscv_read_rtc_raw(). The same logic applies to writes to lower mtime. Signed-off-by: Jason Chien Reviewed-by: Alistair Francis Message-ID: <20230728082502.26439-1-jason.chien@sifive.com> Signed-off-by: Alistair Francis --- hw/intc/riscv_aclint.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c index b466a6abaf..bf77e29a70 100644 --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -208,11 +208,12 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, return; } else if (addr == mtimer->time_base || addr == mtimer->time_base + 4) { uint64_t rtc_r = cpu_riscv_read_rtc_raw(mtimer->timebase_freq); + uint64_t rtc = cpu_riscv_read_rtc(mtimer); if (addr == mtimer->time_base) { if (size == 4) { /* time_lo for RV32/RV64 */ - mtimer->time_delta = ((rtc_r & ~0xFFFFFFFFULL) | value) - rtc_r; + mtimer->time_delta = ((rtc & ~0xFFFFFFFFULL) | value) - rtc_r; } else { /* time for RV64 */ mtimer->time_delta = value - rtc_r; @@ -220,7 +221,7 @@ static void riscv_aclint_mtimer_write(void *opaque, hwaddr addr, } else { if (size == 4) { /* time_hi for RV32/RV64 */ - mtimer->time_delta = (value << 32 | (rtc_r & 0xFFFFFFFF)) - rtc_r; + mtimer->time_delta = (value << 32 | (rtc & 0xFFFFFFFF)) - rtc_r; } else { qemu_log_mask(LOG_GUEST_ERROR, "aclint-mtimer: invalid time_hi write: %08x", From 9382a9eafccad8dc6a487ea3a8d2bed03dc35db9 Mon Sep 17 00:00:00 2001 From: Jason Chien Date: Fri, 28 Jul 2023 08:24:39 +0000 Subject: [PATCH 27/45] hw/intc: Make rtc variable names consistent The variables whose values are given by cpu_riscv_read_rtc() should be named "rtc". The variables whose value are given by cpu_riscv_read_rtc_raw() should be named "rtc_r". Signed-off-by: Jason Chien Reviewed-by: Alistair Francis Message-ID: <20230728082502.26439-2-jason.chien@sifive.com> Signed-off-by: Alistair Francis --- hw/intc/riscv_aclint.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/intc/riscv_aclint.c b/hw/intc/riscv_aclint.c index bf77e29a70..25cf7a5d9d 100644 --- a/hw/intc/riscv_aclint.c +++ b/hw/intc/riscv_aclint.c @@ -64,13 +64,13 @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer, uint64_t next; uint64_t diff; - uint64_t rtc_r = cpu_riscv_read_rtc(mtimer); + uint64_t rtc = cpu_riscv_read_rtc(mtimer); /* Compute the relative hartid w.r.t the socket */ hartid = hartid - mtimer->hartid_base; mtimer->timecmp[hartid] = value; - if (mtimer->timecmp[hartid] <= rtc_r) { + if (mtimer->timecmp[hartid] <= rtc) { /* * If we're setting an MTIMECMP value in the "past", * immediately raise the timer interrupt @@ -81,7 +81,7 @@ static void riscv_aclint_mtimer_write_timecmp(RISCVAclintMTimerState *mtimer, /* otherwise, set up the future timer interrupt */ qemu_irq_lower(mtimer->timer_irqs[hartid]); - diff = mtimer->timecmp[hartid] - rtc_r; + diff = mtimer->timecmp[hartid] - rtc; /* back to ns (note args switched in muldiv64) */ uint64_t ns_diff = muldiv64(diff, NANOSECONDS_PER_SECOND, timebase_freq); From ae7d4d625cab49657b9fc2be09d895afb9bcdaf0 Mon Sep 17 00:00:00 2001 From: LIU Zhiwei Date: Fri, 11 Aug 2023 13:54:38 +0800 Subject: [PATCH 28/45] linux-user/riscv: Use abi type for target_ucontext MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We should not use types dependend on host arch for target_ucontext. This bug is found when run rv32 applications. Signed-off-by: LIU Zhiwei Reviewed-by: Richard Henderson Reviewed-by: Daniel Henrique Barboza Reviewed-by: Philippe Mathieu-Daudé Message-ID: <20230811055438.1945-1-zhiwei_liu@linux.alibaba.com> Signed-off-by: Alistair Francis --- linux-user/riscv/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linux-user/riscv/signal.c b/linux-user/riscv/signal.c index eaa168199a..f989f7f51f 100644 --- a/linux-user/riscv/signal.c +++ b/linux-user/riscv/signal.c @@ -38,8 +38,8 @@ struct target_sigcontext { }; /* cf. riscv-linux:arch/riscv/include/uapi/asm/ptrace.h */ struct target_ucontext { - unsigned long uc_flags; - struct target_ucontext *uc_link; + abi_ulong uc_flags; + abi_ptr uc_link; target_stack_t uc_stack; target_sigset_t uc_sigmask; uint8_t __unused[1024 / 8 - sizeof(target_sigset_t)]; From 59a07d3c619bec722e8522be2c26f892fbf0cbd4 Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Thu, 27 Jul 2023 10:24:33 +0000 Subject: [PATCH 29/45] target/riscv: support the AIA device emulation with KVM enabled In this patch, we create the APLIC and IMSIC FDT helper functions and remove M mode AIA devices when using KVM acceleration. Signed-off-by: Yong-Xuan Wang Reviewed-by: Jim Shu Reviewed-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Message-ID: <20230727102439.22554-2-yongxuan.wang@sifive.com> Signed-off-by: Alistair Francis --- hw/riscv/virt.c | 306 +++++++++++++++++++++++------------------------- 1 file changed, 145 insertions(+), 161 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index a5ac3ab777..09a4030d4a 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -516,79 +516,28 @@ static uint32_t imsic_num_bits(uint32_t count) return ret; } -static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, - uint32_t *phandle, uint32_t *intc_phandles, - uint32_t *msi_m_phandle, uint32_t *msi_s_phandle) +static void create_fdt_one_imsic(RISCVVirtState *s, hwaddr base_addr, + uint32_t *intc_phandles, uint32_t msi_phandle, + bool m_mode, uint32_t imsic_guest_bits) { int cpu, socket; char *imsic_name; MachineState *ms = MACHINE(s); int socket_count = riscv_socket_count(ms); - uint32_t imsic_max_hart_per_socket, imsic_guest_bits; + uint32_t imsic_max_hart_per_socket; uint32_t *imsic_cells, *imsic_regs, imsic_addr, imsic_size; - *msi_m_phandle = (*phandle)++; - *msi_s_phandle = (*phandle)++; imsic_cells = g_new0(uint32_t, ms->smp.cpus * 2); imsic_regs = g_new0(uint32_t, socket_count * 4); - /* M-level IMSIC node */ for (cpu = 0; cpu < ms->smp.cpus; cpu++) { imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); - imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT); + imsic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); } + imsic_max_hart_per_socket = 0; for (socket = 0; socket < socket_count; socket++) { - imsic_addr = memmap[VIRT_IMSIC_M].base + - socket * VIRT_IMSIC_GROUP_MAX_SIZE; - imsic_size = IMSIC_HART_SIZE(0) * s->soc[socket].num_harts; - imsic_regs[socket * 4 + 0] = 0; - imsic_regs[socket * 4 + 1] = cpu_to_be32(imsic_addr); - imsic_regs[socket * 4 + 2] = 0; - imsic_regs[socket * 4 + 3] = cpu_to_be32(imsic_size); - if (imsic_max_hart_per_socket < s->soc[socket].num_harts) { - imsic_max_hart_per_socket = s->soc[socket].num_harts; - } - } - imsic_name = g_strdup_printf("/soc/imsics@%lx", - (unsigned long)memmap[VIRT_IMSIC_M].base); - qemu_fdt_add_subnode(ms->fdt, imsic_name); - qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", - "riscv,imsics"); - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells", - FDT_IMSIC_INT_CELLS); - qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", - NULL, 0); - qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", - NULL, 0); - qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended", - imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); - qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs, - socket_count * sizeof(uint32_t) * 4); - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids", - VIRT_IRQCHIP_NUM_MSIS); - if (socket_count > 1) { - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits", - imsic_num_bits(imsic_max_hart_per_socket)); - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits", - imsic_num_bits(socket_count)); - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift", - IMSIC_MMIO_GROUP_MIN_SHIFT); - } - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_m_phandle); - - g_free(imsic_name); - - /* S-level IMSIC node */ - for (cpu = 0; cpu < ms->smp.cpus; cpu++) { - imsic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); - imsic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); - } - imsic_guest_bits = imsic_num_bits(s->aia_guests + 1); - imsic_max_hart_per_socket = 0; - for (socket = 0; socket < socket_count; socket++) { - imsic_addr = memmap[VIRT_IMSIC_S].base + - socket * VIRT_IMSIC_GROUP_MAX_SIZE; + imsic_addr = base_addr + socket * VIRT_IMSIC_GROUP_MAX_SIZE; imsic_size = IMSIC_HART_SIZE(imsic_guest_bits) * s->soc[socket].num_harts; imsic_regs[socket * 4 + 0] = 0; @@ -599,42 +548,116 @@ static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, imsic_max_hart_per_socket = s->soc[socket].num_harts; } } - imsic_name = g_strdup_printf("/soc/imsics@%lx", - (unsigned long)memmap[VIRT_IMSIC_S].base); + + imsic_name = g_strdup_printf("/soc/imsics@%lx", (unsigned long)base_addr); qemu_fdt_add_subnode(ms->fdt, imsic_name); - qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", - "riscv,imsics"); + qemu_fdt_setprop_string(ms->fdt, imsic_name, "compatible", "riscv,imsics"); qemu_fdt_setprop_cell(ms->fdt, imsic_name, "#interrupt-cells", - FDT_IMSIC_INT_CELLS); - qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", - NULL, 0); - qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", - NULL, 0); + FDT_IMSIC_INT_CELLS); + qemu_fdt_setprop(ms->fdt, imsic_name, "interrupt-controller", NULL, 0); + qemu_fdt_setprop(ms->fdt, imsic_name, "msi-controller", NULL, 0); qemu_fdt_setprop(ms->fdt, imsic_name, "interrupts-extended", - imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); + imsic_cells, ms->smp.cpus * sizeof(uint32_t) * 2); qemu_fdt_setprop(ms->fdt, imsic_name, "reg", imsic_regs, - socket_count * sizeof(uint32_t) * 4); + socket_count * sizeof(uint32_t) * 4); qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,num-ids", - VIRT_IRQCHIP_NUM_MSIS); + VIRT_IRQCHIP_NUM_MSIS); + if (imsic_guest_bits) { qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,guest-index-bits", - imsic_guest_bits); + imsic_guest_bits); } + if (socket_count > 1) { qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,hart-index-bits", - imsic_num_bits(imsic_max_hart_per_socket)); + imsic_num_bits(imsic_max_hart_per_socket)); qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-bits", - imsic_num_bits(socket_count)); + imsic_num_bits(socket_count)); qemu_fdt_setprop_cell(ms->fdt, imsic_name, "riscv,group-index-shift", - IMSIC_MMIO_GROUP_MIN_SHIFT); + IMSIC_MMIO_GROUP_MIN_SHIFT); } - qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", *msi_s_phandle); - g_free(imsic_name); + qemu_fdt_setprop_cell(ms->fdt, imsic_name, "phandle", msi_phandle); + g_free(imsic_name); g_free(imsic_regs); g_free(imsic_cells); } +static void create_fdt_imsic(RISCVVirtState *s, const MemMapEntry *memmap, + uint32_t *phandle, uint32_t *intc_phandles, + uint32_t *msi_m_phandle, uint32_t *msi_s_phandle) +{ + *msi_m_phandle = (*phandle)++; + *msi_s_phandle = (*phandle)++; + + if (!kvm_enabled()) { + /* M-level IMSIC node */ + create_fdt_one_imsic(s, memmap[VIRT_IMSIC_M].base, intc_phandles, + *msi_m_phandle, true, 0); + } + + /* S-level IMSIC node */ + create_fdt_one_imsic(s, memmap[VIRT_IMSIC_S].base, intc_phandles, + *msi_s_phandle, false, + imsic_num_bits(s->aia_guests + 1)); + +} + +static void create_fdt_one_aplic(RISCVVirtState *s, int socket, + unsigned long aplic_addr, uint32_t aplic_size, + uint32_t msi_phandle, + uint32_t *intc_phandles, + uint32_t aplic_phandle, + uint32_t aplic_child_phandle, + bool m_mode) +{ + int cpu; + char *aplic_name; + uint32_t *aplic_cells; + MachineState *ms = MACHINE(s); + + aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); + + for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { + aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); + aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); + } + + aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); + qemu_fdt_add_subnode(ms->fdt, aplic_name); + qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic"); + qemu_fdt_setprop_cell(ms->fdt, aplic_name, + "#interrupt-cells", FDT_APLIC_INT_CELLS); + qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0); + + if (s->aia_type == VIRT_AIA_TYPE_APLIC) { + qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended", + aplic_cells, + s->soc[socket].num_harts * sizeof(uint32_t) * 2); + } else { + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle); + } + + qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", + 0x0, aplic_addr, 0x0, aplic_size); + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", + VIRT_IRQCHIP_NUM_SOURCES); + + if (aplic_child_phandle) { + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children", + aplic_child_phandle); + qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate", + aplic_child_phandle, 0x1, + VIRT_IRQCHIP_NUM_SOURCES); + } + + riscv_socket_fdt_write_id(ms, aplic_name, socket); + qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_phandle); + + g_free(aplic_name); + g_free(aplic_cells); +} + static void create_fdt_socket_aplic(RISCVVirtState *s, const MemMapEntry *memmap, int socket, uint32_t msi_m_phandle, @@ -643,75 +666,33 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, uint32_t *intc_phandles, uint32_t *aplic_phandles) { - int cpu; char *aplic_name; - uint32_t *aplic_cells; unsigned long aplic_addr; MachineState *ms = MACHINE(s); uint32_t aplic_m_phandle, aplic_s_phandle; aplic_m_phandle = (*phandle)++; aplic_s_phandle = (*phandle)++; - aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); - /* M-level APLIC node */ - for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { - aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); - aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_M_EXT); + if (!kvm_enabled()) { + /* M-level APLIC node */ + aplic_addr = memmap[VIRT_APLIC_M].base + + (memmap[VIRT_APLIC_M].size * socket); + create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size, + msi_m_phandle, intc_phandles, + aplic_m_phandle, aplic_s_phandle, + true); } - aplic_addr = memmap[VIRT_APLIC_M].base + - (memmap[VIRT_APLIC_M].size * socket); - aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); - qemu_fdt_add_subnode(ms->fdt, aplic_name); - qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic"); - qemu_fdt_setprop_cell(ms->fdt, aplic_name, - "#interrupt-cells", FDT_APLIC_INT_CELLS); - qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0); - if (s->aia_type == VIRT_AIA_TYPE_APLIC) { - qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended", - aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2); - } else { - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", - msi_m_phandle); - } - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", - 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_M].size); - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", - VIRT_IRQCHIP_NUM_SOURCES); - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,children", - aplic_s_phandle); - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "riscv,delegate", - aplic_s_phandle, 0x1, VIRT_IRQCHIP_NUM_SOURCES); - riscv_socket_fdt_write_id(ms, aplic_name, socket); - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_m_phandle); - g_free(aplic_name); /* S-level APLIC node */ - for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { - aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); - aplic_cells[cpu * 2 + 1] = cpu_to_be32(IRQ_S_EXT); - } aplic_addr = memmap[VIRT_APLIC_S].base + (memmap[VIRT_APLIC_S].size * socket); + create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size, + msi_s_phandle, intc_phandles, + aplic_s_phandle, 0, + false); + aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); - qemu_fdt_add_subnode(ms->fdt, aplic_name); - qemu_fdt_setprop_string(ms->fdt, aplic_name, "compatible", "riscv,aplic"); - qemu_fdt_setprop_cell(ms->fdt, aplic_name, - "#interrupt-cells", FDT_APLIC_INT_CELLS); - qemu_fdt_setprop(ms->fdt, aplic_name, "interrupt-controller", NULL, 0); - if (s->aia_type == VIRT_AIA_TYPE_APLIC) { - qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended", - aplic_cells, s->soc[socket].num_harts * sizeof(uint32_t) * 2); - } else { - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", - msi_s_phandle); - } - qemu_fdt_setprop_cells(ms->fdt, aplic_name, "reg", - 0x0, aplic_addr, 0x0, memmap[VIRT_APLIC_S].size); - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "riscv,num-sources", - VIRT_IRQCHIP_NUM_SOURCES); - riscv_socket_fdt_write_id(ms, aplic_name, socket); - qemu_fdt_setprop_cell(ms->fdt, aplic_name, "phandle", aplic_s_phandle); if (!socket) { platform_bus_add_all_fdt_nodes(ms->fdt, aplic_name, @@ -722,7 +703,6 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, g_free(aplic_name); - g_free(aplic_cells); aplic_phandles[socket] = aplic_s_phandle; } @@ -1163,16 +1143,20 @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, int i; hwaddr addr; uint32_t guest_bits; - DeviceState *aplic_m; - bool msimode = (aia_type == VIRT_AIA_TYPE_APLIC_IMSIC) ? true : false; + DeviceState *aplic_s = NULL; + DeviceState *aplic_m = NULL; + bool msimode = aia_type == VIRT_AIA_TYPE_APLIC_IMSIC; if (msimode) { - /* Per-socket M-level IMSICs */ - addr = memmap[VIRT_IMSIC_M].base + socket * VIRT_IMSIC_GROUP_MAX_SIZE; - for (i = 0; i < hart_count; i++) { - riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), - base_hartid + i, true, 1, - VIRT_IRQCHIP_NUM_MSIS); + if (!kvm_enabled()) { + /* Per-socket M-level IMSICs */ + addr = memmap[VIRT_IMSIC_M].base + + socket * VIRT_IMSIC_GROUP_MAX_SIZE; + for (i = 0; i < hart_count; i++) { + riscv_imsic_create(addr + i * IMSIC_HART_SIZE(0), + base_hartid + i, true, 1, + VIRT_IRQCHIP_NUM_MSIS); + } } /* Per-socket S-level IMSICs */ @@ -1185,29 +1169,29 @@ static DeviceState *virt_create_aia(RISCVVirtAIAType aia_type, int aia_guests, } } - /* Per-socket M-level APLIC */ - aplic_m = riscv_aplic_create( - memmap[VIRT_APLIC_M].base + socket * memmap[VIRT_APLIC_M].size, - memmap[VIRT_APLIC_M].size, - (msimode) ? 0 : base_hartid, - (msimode) ? 0 : hart_count, - VIRT_IRQCHIP_NUM_SOURCES, - VIRT_IRQCHIP_NUM_PRIO_BITS, - msimode, true, NULL); - - if (aplic_m) { - /* Per-socket S-level APLIC */ - riscv_aplic_create( - memmap[VIRT_APLIC_S].base + socket * memmap[VIRT_APLIC_S].size, - memmap[VIRT_APLIC_S].size, - (msimode) ? 0 : base_hartid, - (msimode) ? 0 : hart_count, - VIRT_IRQCHIP_NUM_SOURCES, - VIRT_IRQCHIP_NUM_PRIO_BITS, - msimode, false, aplic_m); + if (!kvm_enabled()) { + /* Per-socket M-level APLIC */ + aplic_m = riscv_aplic_create(memmap[VIRT_APLIC_M].base + + socket * memmap[VIRT_APLIC_M].size, + memmap[VIRT_APLIC_M].size, + (msimode) ? 0 : base_hartid, + (msimode) ? 0 : hart_count, + VIRT_IRQCHIP_NUM_SOURCES, + VIRT_IRQCHIP_NUM_PRIO_BITS, + msimode, true, NULL); } - return aplic_m; + /* Per-socket S-level APLIC */ + aplic_s = riscv_aplic_create(memmap[VIRT_APLIC_S].base + + socket * memmap[VIRT_APLIC_S].size, + memmap[VIRT_APLIC_S].size, + (msimode) ? 0 : base_hartid, + (msimode) ? 0 : hart_count, + VIRT_IRQCHIP_NUM_SOURCES, + VIRT_IRQCHIP_NUM_PRIO_BITS, + msimode, false, aplic_m); + + return kvm_enabled() ? aplic_s : aplic_m; } static void create_platform_bus(RISCVVirtState *s, DeviceState *irqchip) From 97b9f5ef144151e2e8917ea64aca74a5751745ef Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Thu, 27 Jul 2023 10:24:34 +0000 Subject: [PATCH 30/45] target/riscv: check the in-kernel irqchip support We check the in-kernel irqchip support when using KVM acceleration. Signed-off-by: Yong-Xuan Wang Reviewed-by: Jim Shu Reviewed-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Message-ID: <20230727102439.22554-3-yongxuan.wang@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/kvm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index dbcf26f27d..2953547cb6 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -926,7 +926,15 @@ int kvm_arch_init(MachineState *ms, KVMState *s) int kvm_arch_irqchip_create(KVMState *s) { - return 0; + if (kvm_kernel_irqchip_split()) { + error_report("-machine kernel_irqchip=split is not supported on RISC-V."); + exit(1); + } + + /* + * We can create the VAIA using the newer device control API. + */ + return kvm_check_extension(s, KVM_CAP_DEVICE_CTRL); } int kvm_arch_process_async_events(CPUState *cs) From 9634ef7eda5f5b57f03924351a213b776f6b8a23 Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Thu, 27 Jul 2023 10:24:35 +0000 Subject: [PATCH 31/45] target/riscv: Create an KVM AIA irqchip We create a vAIA chip by using the KVM_DEV_TYPE_RISCV_AIA and then set up the chip with the KVM_DEV_RISCV_AIA_GRP_* APIs. We also extend KVM accelerator to specify the KVM AIA mode. The "riscv-aia" parameter is passed along with --accel in QEMU command-line. 1) "riscv-aia=emul": IMSIC is emulated by hypervisor 2) "riscv-aia=hwaccel": use hardware guest IMSIC 3) "riscv-aia=auto": use the hardware guest IMSICs whenever available otherwise we fallback to software emulation. Signed-off-by: Yong-Xuan Wang Reviewed-by: Jim Shu Reviewed-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Message-ID: <20230727102439.22554-4-yongxuan.wang@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/kvm.c | 186 +++++++++++++++++++++++++++++++++++++++ target/riscv/kvm_riscv.h | 4 + 2 files changed, 190 insertions(+) diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index 2953547cb6..6e909d0fdd 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -36,6 +36,7 @@ #include "exec/address-spaces.h" #include "hw/boards.h" #include "hw/irq.h" +#include "hw/intc/riscv_imsic.h" #include "qemu/log.h" #include "hw/loader.h" #include "kvm_riscv.h" @@ -43,6 +44,7 @@ #include "chardev/char-fe.h" #include "migration/migration.h" #include "sysemu/runstate.h" +#include "hw/riscv/numa.h" static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, uint64_t idx) @@ -1035,6 +1037,190 @@ bool kvm_arch_cpu_check_are_resettable(void) return true; } +static int aia_mode; + +static const char *kvm_aia_mode_str(uint64_t mode) +{ + switch (mode) { + case KVM_DEV_RISCV_AIA_MODE_EMUL: + return "emul"; + case KVM_DEV_RISCV_AIA_MODE_HWACCEL: + return "hwaccel"; + case KVM_DEV_RISCV_AIA_MODE_AUTO: + default: + return "auto"; + }; +} + +static char *riscv_get_kvm_aia(Object *obj, Error **errp) +{ + return g_strdup(kvm_aia_mode_str(aia_mode)); +} + +static void riscv_set_kvm_aia(Object *obj, const char *val, Error **errp) +{ + if (!strcmp(val, "emul")) { + aia_mode = KVM_DEV_RISCV_AIA_MODE_EMUL; + } else if (!strcmp(val, "hwaccel")) { + aia_mode = KVM_DEV_RISCV_AIA_MODE_HWACCEL; + } else if (!strcmp(val, "auto")) { + aia_mode = KVM_DEV_RISCV_AIA_MODE_AUTO; + } else { + error_setg(errp, "Invalid KVM AIA mode"); + error_append_hint(errp, "Valid values are emul, hwaccel, and auto.\n"); + } +} + void kvm_arch_accel_class_init(ObjectClass *oc) { + object_class_property_add_str(oc, "riscv-aia", riscv_get_kvm_aia, + riscv_set_kvm_aia); + object_class_property_set_description(oc, "riscv-aia", + "Set KVM AIA mode. Valid values are " + "emul, hwaccel, and auto. Default " + "is auto."); + object_property_set_default_str(object_class_property_find(oc, "riscv-aia"), + "auto"); +} + +void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, + uint64_t aia_irq_num, uint64_t aia_msi_num, + uint64_t aplic_base, uint64_t imsic_base, + uint64_t guest_num) +{ + int ret, i; + int aia_fd = -1; + uint64_t default_aia_mode; + uint64_t socket_count = riscv_socket_count(machine); + uint64_t max_hart_per_socket = 0; + uint64_t socket, base_hart, hart_count, socket_imsic_base, imsic_addr; + uint64_t socket_bits, hart_bits, guest_bits; + + aia_fd = kvm_create_device(kvm_state, KVM_DEV_TYPE_RISCV_AIA, false); + + if (aia_fd < 0) { + error_report("Unable to create in-kernel irqchip"); + exit(1); + } + + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_MODE, + &default_aia_mode, false, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to get current KVM AIA mode"); + exit(1); + } + qemu_log("KVM AIA: default mode is %s\n", + kvm_aia_mode_str(default_aia_mode)); + + if (default_aia_mode != aia_mode) { + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_MODE, + &aia_mode, true, NULL); + if (ret < 0) + warn_report("KVM AIA: failed to set KVM AIA mode"); + else + qemu_log("KVM AIA: set current mode to %s\n", + kvm_aia_mode_str(aia_mode)); + } + + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_SRCS, + &aia_irq_num, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set number of input irq lines"); + exit(1); + } + + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_IDS, + &aia_msi_num, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set number of msi"); + exit(1); + } + + socket_bits = find_last_bit(&socket_count, BITS_PER_LONG) + 1; + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_GROUP_BITS, + &socket_bits, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set group_bits"); + exit(1); + } + + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_GROUP_SHIFT, + &group_shift, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set group_shift"); + exit(1); + } + + guest_bits = guest_num == 0 ? 0 : + find_last_bit(&guest_num, BITS_PER_LONG) + 1; + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_GUEST_BITS, + &guest_bits, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set guest_bits"); + exit(1); + } + + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR, + KVM_DEV_RISCV_AIA_ADDR_APLIC, + &aplic_base, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set the base address of APLIC"); + exit(1); + } + + for (socket = 0; socket < socket_count; socket++) { + socket_imsic_base = imsic_base + socket * (1U << group_shift); + hart_count = riscv_socket_hart_count(machine, socket); + base_hart = riscv_socket_first_hartid(machine, socket); + + if (max_hart_per_socket < hart_count) { + max_hart_per_socket = hart_count; + } + + for (i = 0; i < hart_count; i++) { + imsic_addr = socket_imsic_base + i * IMSIC_HART_SIZE(guest_bits); + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_ADDR, + KVM_DEV_RISCV_AIA_ADDR_IMSIC(i + base_hart), + &imsic_addr, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set the IMSIC address for hart %d", i); + exit(1); + } + } + } + + hart_bits = find_last_bit(&max_hart_per_socket, BITS_PER_LONG) + 1; + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CONFIG, + KVM_DEV_RISCV_AIA_CONFIG_HART_BITS, + &hart_bits, true, NULL); + if (ret < 0) { + error_report("KVM AIA: failed to set hart_bits"); + exit(1); + } + + if (kvm_has_gsi_routing()) { + for (uint64_t idx = 0; idx < aia_irq_num + 1; ++idx) { + /* KVM AIA only has one APLIC instance */ + kvm_irqchip_add_irq_route(kvm_state, idx, 0, idx); + } + kvm_gsi_routing_allowed = true; + kvm_irqchip_commit_routes(kvm_state); + } + + ret = kvm_device_access(aia_fd, KVM_DEV_RISCV_AIA_GRP_CTRL, + KVM_DEV_RISCV_AIA_CTRL_INIT, + NULL, true, NULL); + if (ret < 0) { + error_report("KVM AIA: initialized fail"); + exit(1); + } + + kvm_msi_via_irqfd_allowed = kvm_irqfds_enabled(); } diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h index e3ba935808..7d4b7c60e2 100644 --- a/target/riscv/kvm_riscv.h +++ b/target/riscv/kvm_riscv.h @@ -22,5 +22,9 @@ void kvm_riscv_init_user_properties(Object *cpu_obj); void kvm_riscv_reset_vcpu(RISCVCPU *cpu); void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level); +void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, + uint64_t aia_irq_num, uint64_t aia_msi_num, + uint64_t aplic_base, uint64_t imsic_base, + uint64_t guest_num); #endif From 95a97b3fd25ee1c73a6bbfe0d47ac31864a95a4c Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Thu, 27 Jul 2023 10:24:36 +0000 Subject: [PATCH 32/45] target/riscv: update APLIC and IMSIC to support KVM AIA KVM AIA can't emulate APLIC only. When "aia=aplic" parameter is passed, APLIC devices is emulated by QEMU. For "aia=aplic-imsic", remove the mmio operations of APLIC when using KVM AIA and send wired interrupt signal via KVM_IRQ_LINE API. After KVM AIA enabled, MSI messages are delivered by KVM_SIGNAL_MSI API when the IMSICs receive mmio write requests. Signed-off-by: Yong-Xuan Wang Reviewed-by: Jim Shu Reviewed-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Message-ID: <20230727102439.22554-5-yongxuan.wang@sifive.com> Signed-off-by: Alistair Francis --- hw/intc/riscv_aplic.c | 56 ++++++++++++++++++++++++++++++------------- hw/intc/riscv_imsic.c | 25 +++++++++++++++---- 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index 4bdc6a5d1a..592c3ce768 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -31,6 +31,7 @@ #include "hw/irq.h" #include "target/riscv/cpu.h" #include "sysemu/sysemu.h" +#include "sysemu/kvm.h" #include "migration/vmstate.h" #define APLIC_MAX_IDC (1UL << 14) @@ -148,6 +149,15 @@ #define APLIC_IDC_CLAIMI 0x1c +/* + * KVM AIA only supports APLIC MSI, fallback to QEMU emulation if we want to use + * APLIC Wired. + */ +static bool is_kvm_aia(bool msimode) +{ + return kvm_irqchip_in_kernel() && msimode; +} + static uint32_t riscv_aplic_read_input_word(RISCVAPLICState *aplic, uint32_t word) { @@ -471,6 +481,11 @@ static uint32_t riscv_aplic_idc_claimi(RISCVAPLICState *aplic, uint32_t idc) return topi; } +static void riscv_kvm_aplic_request(void *opaque, int irq, int level) +{ + kvm_set_irq(kvm_state, irq, !!level); +} + static void riscv_aplic_request(void *opaque, int irq, int level) { bool update = false; @@ -801,29 +816,35 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp) uint32_t i; RISCVAPLICState *aplic = RISCV_APLIC(dev); - aplic->bitfield_words = (aplic->num_irqs + 31) >> 5; - aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs); - aplic->state = g_new0(uint32_t, aplic->num_irqs); - aplic->target = g_new0(uint32_t, aplic->num_irqs); - if (!aplic->msimode) { - for (i = 0; i < aplic->num_irqs; i++) { - aplic->target[i] = 1; + if (!is_kvm_aia(aplic->msimode)) { + aplic->bitfield_words = (aplic->num_irqs + 31) >> 5; + aplic->sourcecfg = g_new0(uint32_t, aplic->num_irqs); + aplic->state = g_new0(uint32_t, aplic->num_irqs); + aplic->target = g_new0(uint32_t, aplic->num_irqs); + if (!aplic->msimode) { + for (i = 0; i < aplic->num_irqs; i++) { + aplic->target[i] = 1; + } } - } - aplic->idelivery = g_new0(uint32_t, aplic->num_harts); - aplic->iforce = g_new0(uint32_t, aplic->num_harts); - aplic->ithreshold = g_new0(uint32_t, aplic->num_harts); + aplic->idelivery = g_new0(uint32_t, aplic->num_harts); + aplic->iforce = g_new0(uint32_t, aplic->num_harts); + aplic->ithreshold = g_new0(uint32_t, aplic->num_harts); - memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops, aplic, - TYPE_RISCV_APLIC, aplic->aperture_size); - sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio); + memory_region_init_io(&aplic->mmio, OBJECT(dev), &riscv_aplic_ops, + aplic, TYPE_RISCV_APLIC, aplic->aperture_size); + sysbus_init_mmio(SYS_BUS_DEVICE(dev), &aplic->mmio); + } /* * Only root APLICs have hardware IRQ lines. All non-root APLICs * have IRQ lines delegated by their parent APLIC. */ if (!aplic->parent) { - qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs); + if (is_kvm_aia(aplic->msimode)) { + qdev_init_gpio_in(dev, riscv_kvm_aplic_request, aplic->num_irqs); + } else { + qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs); + } } /* Create output IRQ lines for non-MSI mode */ @@ -958,7 +979,10 @@ DeviceState *riscv_aplic_create(hwaddr addr, hwaddr size, qdev_prop_set_bit(dev, "mmode", mmode); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); + + if (!is_kvm_aia(msimode)) { + sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, addr); + } if (parent) { riscv_aplic_add_child(parent, dev); diff --git a/hw/intc/riscv_imsic.c b/hw/intc/riscv_imsic.c index fea3385b51..760dbddcf7 100644 --- a/hw/intc/riscv_imsic.c +++ b/hw/intc/riscv_imsic.c @@ -32,6 +32,7 @@ #include "target/riscv/cpu.h" #include "target/riscv/cpu_bits.h" #include "sysemu/sysemu.h" +#include "sysemu/kvm.h" #include "migration/vmstate.h" #define IMSIC_MMIO_PAGE_LE 0x00 @@ -283,6 +284,20 @@ static void riscv_imsic_write(void *opaque, hwaddr addr, uint64_t value, goto err; } +#if defined(CONFIG_KVM) + if (kvm_irqchip_in_kernel()) { + struct kvm_msi msi; + + msi.address_lo = extract64(imsic->mmio.addr + addr, 0, 32); + msi.address_hi = extract64(imsic->mmio.addr + addr, 32, 32); + msi.data = le32_to_cpu(value); + + kvm_vm_ioctl(kvm_state, KVM_SIGNAL_MSI, &msi); + + return; + } +#endif + /* Writes only supported for MSI little-endian registers */ page = addr >> IMSIC_MMIO_PAGE_SHIFT; if ((addr & (IMSIC_MMIO_PAGE_SZ - 1)) == IMSIC_MMIO_PAGE_LE) { @@ -320,10 +335,12 @@ static void riscv_imsic_realize(DeviceState *dev, Error **errp) CPUState *cpu = cpu_by_arch_id(imsic->hartid); CPURISCVState *env = cpu ? cpu->env_ptr : NULL; - imsic->num_eistate = imsic->num_pages * imsic->num_irqs; - imsic->eidelivery = g_new0(uint32_t, imsic->num_pages); - imsic->eithreshold = g_new0(uint32_t, imsic->num_pages); - imsic->eistate = g_new0(uint32_t, imsic->num_eistate); + if (!kvm_irqchip_in_kernel()) { + imsic->num_eistate = imsic->num_pages * imsic->num_irqs; + imsic->eidelivery = g_new0(uint32_t, imsic->num_pages); + imsic->eithreshold = g_new0(uint32_t, imsic->num_pages); + imsic->eistate = g_new0(uint32_t, imsic->num_eistate); + } memory_region_init_io(&imsic->mmio, OBJECT(dev), &riscv_imsic_ops, imsic, TYPE_RISCV_IMSIC, From 48c2c33c52cd019f6e3c3c916b9777002905a4ef Mon Sep 17 00:00:00 2001 From: Yong-Xuan Wang Date: Thu, 27 Jul 2023 10:24:37 +0000 Subject: [PATCH 33/45] target/riscv: select KVM AIA in riscv virt machine Select KVM AIA when the host kernel has in-kernel AIA chip support. Since KVM AIA only has one APLIC instance, we map the QEMU APLIC devices to KVM APLIC. Signed-off-by: Yong-Xuan Wang Reviewed-by: Jim Shu Reviewed-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Message-ID: <20230727102439.22554-6-yongxuan.wang@sifive.com> Signed-off-by: Alistair Francis --- hw/riscv/virt.c | 92 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 62 insertions(+), 30 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 09a4030d4a..0353a6de56 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -35,6 +35,7 @@ #include "hw/riscv/virt.h" #include "hw/riscv/boot.h" #include "hw/riscv/numa.h" +#include "kvm_riscv.h" #include "hw/intc/riscv_aclint.h" #include "hw/intc/riscv_aplic.h" #include "hw/intc/riscv_imsic.h" @@ -75,6 +76,12 @@ #error "Can't accommodate all IMSIC groups in address space" #endif +/* KVM AIA only supports APLIC MSI. APLIC Wired is always emulated by QEMU. */ +static bool virt_use_kvm_aia(RISCVVirtState *s) +{ + return kvm_irqchip_in_kernel() && s->aia_type == VIRT_AIA_TYPE_APLIC_IMSIC; +} + static const MemMapEntry virt_memmap[] = { [VIRT_DEBUG] = { 0x0, 0x100 }, [VIRT_MROM] = { 0x1000, 0xf000 }, @@ -609,16 +616,16 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, uint32_t *intc_phandles, uint32_t aplic_phandle, uint32_t aplic_child_phandle, - bool m_mode) + bool m_mode, int num_harts) { int cpu; char *aplic_name; uint32_t *aplic_cells; MachineState *ms = MACHINE(s); - aplic_cells = g_new0(uint32_t, s->soc[socket].num_harts * 2); + aplic_cells = g_new0(uint32_t, num_harts * 2); - for (cpu = 0; cpu < s->soc[socket].num_harts; cpu++) { + for (cpu = 0; cpu < num_harts; cpu++) { aplic_cells[cpu * 2 + 0] = cpu_to_be32(intc_phandles[cpu]); aplic_cells[cpu * 2 + 1] = cpu_to_be32(m_mode ? IRQ_M_EXT : IRQ_S_EXT); } @@ -632,8 +639,7 @@ static void create_fdt_one_aplic(RISCVVirtState *s, int socket, if (s->aia_type == VIRT_AIA_TYPE_APLIC) { qemu_fdt_setprop(ms->fdt, aplic_name, "interrupts-extended", - aplic_cells, - s->soc[socket].num_harts * sizeof(uint32_t) * 2); + aplic_cells, num_harts * sizeof(uint32_t) * 2); } else { qemu_fdt_setprop_cell(ms->fdt, aplic_name, "msi-parent", msi_phandle); } @@ -664,7 +670,8 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, uint32_t msi_s_phandle, uint32_t *phandle, uint32_t *intc_phandles, - uint32_t *aplic_phandles) + uint32_t *aplic_phandles, + int num_harts) { char *aplic_name; unsigned long aplic_addr; @@ -681,7 +688,7 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_M].size, msi_m_phandle, intc_phandles, aplic_m_phandle, aplic_s_phandle, - true); + true, num_harts); } /* S-level APLIC node */ @@ -690,7 +697,7 @@ static void create_fdt_socket_aplic(RISCVVirtState *s, create_fdt_one_aplic(s, socket, aplic_addr, memmap[VIRT_APLIC_S].size, msi_s_phandle, intc_phandles, aplic_s_phandle, 0, - false); + false, num_harts); aplic_name = g_strdup_printf("/soc/aplic@%lx", aplic_addr); @@ -774,34 +781,51 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, *msi_pcie_phandle = msi_s_phandle; } - phandle_pos = ms->smp.cpus; - for (socket = (socket_count - 1); socket >= 0; socket--) { - phandle_pos -= s->soc[socket].num_harts; + /* KVM AIA only has one APLIC instance */ + if (virt_use_kvm_aia(s)) { + create_fdt_socket_aplic(s, memmap, 0, + msi_m_phandle, msi_s_phandle, phandle, + &intc_phandles[0], xplic_phandles, + ms->smp.cpus); + } else { + phandle_pos = ms->smp.cpus; + for (socket = (socket_count - 1); socket >= 0; socket--) { + phandle_pos -= s->soc[socket].num_harts; - if (s->aia_type == VIRT_AIA_TYPE_NONE) { - create_fdt_socket_plic(s, memmap, socket, phandle, - &intc_phandles[phandle_pos], xplic_phandles); - } else { - create_fdt_socket_aplic(s, memmap, socket, - msi_m_phandle, msi_s_phandle, phandle, - &intc_phandles[phandle_pos], xplic_phandles); + if (s->aia_type == VIRT_AIA_TYPE_NONE) { + create_fdt_socket_plic(s, memmap, socket, phandle, + &intc_phandles[phandle_pos], + xplic_phandles); + } else { + create_fdt_socket_aplic(s, memmap, socket, + msi_m_phandle, msi_s_phandle, phandle, + &intc_phandles[phandle_pos], + xplic_phandles, + s->soc[socket].num_harts); + } } } g_free(intc_phandles); - for (socket = 0; socket < socket_count; socket++) { - if (socket == 0) { - *irq_mmio_phandle = xplic_phandles[socket]; - *irq_virtio_phandle = xplic_phandles[socket]; - *irq_pcie_phandle = xplic_phandles[socket]; - } - if (socket == 1) { - *irq_virtio_phandle = xplic_phandles[socket]; - *irq_pcie_phandle = xplic_phandles[socket]; - } - if (socket == 2) { - *irq_pcie_phandle = xplic_phandles[socket]; + if (virt_use_kvm_aia(s)) { + *irq_mmio_phandle = xplic_phandles[0]; + *irq_virtio_phandle = xplic_phandles[0]; + *irq_pcie_phandle = xplic_phandles[0]; + } else { + for (socket = 0; socket < socket_count; socket++) { + if (socket == 0) { + *irq_mmio_phandle = xplic_phandles[socket]; + *irq_virtio_phandle = xplic_phandles[socket]; + *irq_pcie_phandle = xplic_phandles[socket]; + } + if (socket == 1) { + *irq_virtio_phandle = xplic_phandles[socket]; + *irq_pcie_phandle = xplic_phandles[socket]; + } + if (socket == 2) { + *irq_pcie_phandle = xplic_phandles[socket]; + } } } @@ -1437,6 +1461,14 @@ static void virt_machine_init(MachineState *machine) } } + if (virt_use_kvm_aia(s)) { + kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT, + VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS, + memmap[VIRT_APLIC_S].base, + memmap[VIRT_IMSIC_S].base, + s->aia_guests); + } + if (riscv_is_32bit(&s->soc[0])) { #if HOST_LONG_BITS == 64 /* limit RAM size in a 32-bit system */ From 9ff31406312500053ecb5f92df01dd9ce52e635d Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Thu, 27 Jul 2023 15:24:17 +0100 Subject: [PATCH 34/45] hw/riscv: virt: Fix riscv,pmu DT node path On a dtb dumped from the virt machine, dt-validate complains: soc: pmu: {'riscv,event-to-mhpmcounters': [[1, 1, 524281], [2, 2, 524284], [65561, 65561, 524280], [65563, 65563, 524280], [65569, 65569, 524280]], 'compatible': ['riscv,pmu']} should not be valid under {'type': 'object'} from schema $id: http://devicetree.org/schemas/simple-bus.yaml# That's pretty cryptic, but running the dtb back through dtc produces something a lot more reasonable: Warning (simple_bus_reg): /soc/pmu: missing or empty reg/ranges property Moving the riscv,pmu node out of the soc bus solves the problem. Signed-off-by: Conor Dooley Acked-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Message-ID: <20230727-groom-decline-2c57ce42841c@spud> Signed-off-by: Alistair Francis --- hw/riscv/virt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 0353a6de56..496c17c644 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -719,7 +719,7 @@ static void create_fdt_pmu(RISCVVirtState *s) MachineState *ms = MACHINE(s); RISCVCPU hart = s->soc[0].harts[0]; - pmu_name = g_strdup_printf("/soc/pmu"); + pmu_name = g_strdup_printf("/pmu"); qemu_fdt_add_subnode(ms->fdt, pmu_name); qemu_fdt_setprop_string(ms->fdt, pmu_name, "compatible", "riscv,pmu"); riscv_pmu_generate_fdt_node(ms->fdt, hart.cfg.pmu_num, pmu_name); From ed67d63798f2ff31a7541ed97ac671c8e1f79297 Mon Sep 17 00:00:00 2001 From: Weiwei Li Date: Wed, 16 Aug 2023 22:19:16 +0800 Subject: [PATCH 35/45] target/riscv: Update CSR bits name for svadu extension The Svadu specification updated the name of the *envcfg bit from HADE to ADUE. Signed-off-by: Weiwei Li Signed-off-by: Junqiang Wang Reviewed-by: Daniel Henrique Barboza Message-ID: <20230816141916.66898-1-liweiwei@iscas.ac.cn> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 4 ++-- target/riscv/cpu_bits.h | 8 ++++---- target/riscv/cpu_helper.c | 6 +++--- target/riscv/csr.c | 12 ++++++------ 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index fae1c92c5c..8071f05f15 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -886,9 +886,9 @@ static void riscv_cpu_reset_hold(Object *obj) env->two_stage_lookup = false; env->menvcfg = (cpu->cfg.ext_svpbmt ? MENVCFG_PBMTE : 0) | - (cpu->cfg.ext_svadu ? MENVCFG_HADE : 0); + (cpu->cfg.ext_svadu ? MENVCFG_ADUE : 0); env->henvcfg = (cpu->cfg.ext_svpbmt ? HENVCFG_PBMTE : 0) | - (cpu->cfg.ext_svadu ? HENVCFG_HADE : 0); + (cpu->cfg.ext_svadu ? HENVCFG_ADUE : 0); /* Initialized default priorities of local interrupts. */ for (i = 0; i < ARRAY_SIZE(env->miprio); i++) { diff --git a/target/riscv/cpu_bits.h b/target/riscv/cpu_bits.h index 31a8d80990..3d6ffaabc7 100644 --- a/target/riscv/cpu_bits.h +++ b/target/riscv/cpu_bits.h @@ -745,12 +745,12 @@ typedef enum RISCVException { #define MENVCFG_CBIE (3UL << 4) #define MENVCFG_CBCFE BIT(6) #define MENVCFG_CBZE BIT(7) -#define MENVCFG_HADE (1ULL << 61) +#define MENVCFG_ADUE (1ULL << 61) #define MENVCFG_PBMTE (1ULL << 62) #define MENVCFG_STCE (1ULL << 63) /* For RV32 */ -#define MENVCFGH_HADE BIT(29) +#define MENVCFGH_ADUE BIT(29) #define MENVCFGH_PBMTE BIT(30) #define MENVCFGH_STCE BIT(31) @@ -763,12 +763,12 @@ typedef enum RISCVException { #define HENVCFG_CBIE MENVCFG_CBIE #define HENVCFG_CBCFE MENVCFG_CBCFE #define HENVCFG_CBZE MENVCFG_CBZE -#define HENVCFG_HADE MENVCFG_HADE +#define HENVCFG_ADUE MENVCFG_ADUE #define HENVCFG_PBMTE MENVCFG_PBMTE #define HENVCFG_STCE MENVCFG_STCE /* For RV32 */ -#define HENVCFGH_HADE MENVCFGH_HADE +#define HENVCFGH_ADUE MENVCFGH_ADUE #define HENVCFGH_PBMTE MENVCFGH_PBMTE #define HENVCFGH_STCE MENVCFGH_STCE diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c index 9f611d89bb..3a02079290 100644 --- a/target/riscv/cpu_helper.c +++ b/target/riscv/cpu_helper.c @@ -861,11 +861,11 @@ static int get_physical_address(CPURISCVState *env, hwaddr *physical, } bool pbmte = env->menvcfg & MENVCFG_PBMTE; - bool hade = env->menvcfg & MENVCFG_HADE; + bool adue = env->menvcfg & MENVCFG_ADUE; if (first_stage && two_stage && env->virt_enabled) { pbmte = pbmte && (env->henvcfg & HENVCFG_PBMTE); - hade = hade && (env->henvcfg & HENVCFG_HADE); + adue = adue && (env->henvcfg & HENVCFG_ADUE); } int ptshift = (levels - 1) * ptidxbits; @@ -1026,7 +1026,7 @@ restart: /* Page table updates need to be atomic with MTTCG enabled */ if (updated_pte != pte && !is_debug) { - if (!hade) { + if (!adue) { return TRANSLATE_FAIL; } diff --git a/target/riscv/csr.c b/target/riscv/csr.c index 661744e6d4..63c3b0d9fc 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -1957,7 +1957,7 @@ static RISCVException write_menvcfg(CPURISCVState *env, int csrno, if (riscv_cpu_mxl(env) == MXL_RV64) { mask |= (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | (cfg->ext_sstc ? MENVCFG_STCE : 0) | - (cfg->ext_svadu ? MENVCFG_HADE : 0); + (cfg->ext_svadu ? MENVCFG_ADUE : 0); } env->menvcfg = (env->menvcfg & ~mask) | (val & mask); @@ -1977,7 +1977,7 @@ static RISCVException write_menvcfgh(CPURISCVState *env, int csrno, const RISCVCPUConfig *cfg = riscv_cpu_cfg(env); uint64_t mask = (cfg->ext_svpbmt ? MENVCFG_PBMTE : 0) | (cfg->ext_sstc ? MENVCFG_STCE : 0) | - (cfg->ext_svadu ? MENVCFG_HADE : 0); + (cfg->ext_svadu ? MENVCFG_ADUE : 0); uint64_t valh = (uint64_t)val << 32; env->menvcfg = (env->menvcfg & ~mask) | (valh & mask); @@ -2029,7 +2029,7 @@ static RISCVException read_henvcfg(CPURISCVState *env, int csrno, * henvcfg.stce is read_only 0 when menvcfg.stce = 0 * henvcfg.hade is read_only 0 when menvcfg.hade = 0 */ - *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) | + *val = env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) | env->menvcfg); return RISCV_EXCP_NONE; } @@ -2046,7 +2046,7 @@ static RISCVException write_henvcfg(CPURISCVState *env, int csrno, } if (riscv_cpu_mxl(env) == MXL_RV64) { - mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE); + mask |= env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE); } env->henvcfg = (env->henvcfg & ~mask) | (val & mask); @@ -2064,7 +2064,7 @@ static RISCVException read_henvcfgh(CPURISCVState *env, int csrno, return ret; } - *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_HADE) | + *val = (env->henvcfg & (~(HENVCFG_PBMTE | HENVCFG_STCE | HENVCFG_ADUE) | env->menvcfg)) >> 32; return RISCV_EXCP_NONE; } @@ -2073,7 +2073,7 @@ static RISCVException write_henvcfgh(CPURISCVState *env, int csrno, target_ulong val) { uint64_t mask = env->menvcfg & (HENVCFG_PBMTE | HENVCFG_STCE | - HENVCFG_HADE); + HENVCFG_ADUE); uint64_t valh = (uint64_t)val << 32; RISCVException ret; From 3a2fc23563885c219c73c8f24318921daf02f3f2 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Thu, 17 Aug 2023 12:29:03 -0300 Subject: [PATCH 36/45] target/riscv: fix satp_mode_finalize() when satp_mode.supported = 0 In the same emulated RISC-V host, the 'host' KVM CPU takes 4 times longer to boot than the 'rv64' KVM CPU. The reason is an unintended behavior of riscv_cpu_satp_mode_finalize() when satp_mode.supported = 0, i.e. when cpu_init() does not set satp_mode_max_supported(). satp_mode_max_from_map(map) does: 31 - __builtin_clz(map) This means that, if satp_mode.supported = 0, satp_mode_supported_max wil be '31 - 32'. But this is C, so satp_mode_supported_max will gladly set it to UINT_MAX (4294967295). After that, if the user didn't set a satp_mode, set_satp_mode_default_map(cpu) will make cfg.satp_mode.map = cfg.satp_mode.supported So satp_mode.map = 0. And then satp_mode_map_max will be set to satp_mode_max_from_map(cpu->cfg.satp_mode.map), i.e. also UINT_MAX. The guard "satp_mode_map_max > satp_mode_supported_max" doesn't protect us here since both are UINT_MAX. And finally we have 2 loops: for (int i = satp_mode_map_max - 1; i >= 0; --i) { Which are, in fact, 2 loops from UINT_MAX -1 to -1. This is where the extra delay when booting the 'host' CPU is coming from. Commit 43d1de32f8 already set a precedence for satp_mode.supported = 0 in a different manner. We're doing the same here. If supported == 0, interpret as 'the CPU wants the OS to handle satp mode alone' and skip satp_mode_finalize(). We'll also put a guard in satp_mode_max_from_map() to assert out if map is 0 since the function is not ready to deal with it. Cc: Alexandre Ghiti Fixes: 6f23aaeb9b ("riscv: Allow user to set the satp mode") Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Message-ID: <20230817152903.694926-1-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 8071f05f15..34ac26e3ae 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -309,6 +309,17 @@ static uint8_t satp_mode_from_str(const char *satp_mode_str) uint8_t satp_mode_max_from_map(uint32_t map) { + /* + * 'map = 0' will make us return (31 - 32), which C will + * happily overflow to UINT_MAX. There's no good result to + * return if 'map = 0' (e.g. returning 0 will be ambiguous + * with the result for 'map = 1'). + * + * Assert out if map = 0. Callers will have to deal with + * it outside of this function. + */ + g_assert(map > 0); + /* map here has at least one bit set, so no problem with clz */ return 31 - __builtin_clz(map); } @@ -1333,9 +1344,15 @@ void riscv_cpu_validate_set_extensions(RISCVCPU *cpu, Error **errp) static void riscv_cpu_satp_mode_finalize(RISCVCPU *cpu, Error **errp) { bool rv32 = riscv_cpu_mxl(&cpu->env) == MXL_RV32; - uint8_t satp_mode_map_max; - uint8_t satp_mode_supported_max = - satp_mode_max_from_map(cpu->cfg.satp_mode.supported); + uint8_t satp_mode_map_max, satp_mode_supported_max; + + /* The CPU wants the OS to decide which satp mode to use */ + if (cpu->cfg.satp_mode.supported == 0) { + return; + } + + satp_mode_supported_max = + satp_mode_max_from_map(cpu->cfg.satp_mode.supported); if (cpu->cfg.satp_mode.map == 0) { if (cpu->cfg.satp_mode.init == 0) { From c3443f8323638f566a503f67e3b38d55d1f692c5 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 8 Aug 2023 11:17:14 -0700 Subject: [PATCH 37/45] riscv: zicond: make non-experimental zicond is now codegen supported in both llvm and gcc. This change allows seamless enabling/testing of zicond in downstream projects. e.g. currently riscv-gnu-toolchain parses elf attributes to create a cmdline for qemu but fails short of enabling it because of the "x-" prefix. Signed-off-by: Vineet Gupta Message-ID: <20230808181715.436395-1-vineetg@rivosinc.com> Reviewed-by: Alistair Francis Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index 34ac26e3ae..bf0912014e 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -1869,6 +1869,7 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("zcf", RISCVCPU, cfg.ext_zcf, false), DEFINE_PROP_BOOL("zcmp", RISCVCPU, cfg.ext_zcmp, false), DEFINE_PROP_BOOL("zcmt", RISCVCPU, cfg.ext_zcmt, false), + DEFINE_PROP_BOOL("zicond", RISCVCPU, cfg.ext_zicond, false), /* Vendor-specific custom extensions */ DEFINE_PROP_BOOL("xtheadba", RISCVCPU, cfg.ext_xtheadba, false), @@ -1885,7 +1886,6 @@ static Property riscv_cpu_extensions[] = { DEFINE_PROP_BOOL("xventanacondops", RISCVCPU, cfg.ext_XVentanaCondOps, false), /* These are experimental so mark with 'x-' */ - DEFINE_PROP_BOOL("x-zicond", RISCVCPU, cfg.ext_zicond, false), /* ePMP 0.9.3 */ DEFINE_PROP_BOOL("x-epmp", RISCVCPU, cfg.epmp, false), From a51d46102844348b36d583705bdb631879df6fe3 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Wed, 30 Aug 2023 10:35:02 -0300 Subject: [PATCH 38/45] hw/riscv/virt.c: fix non-KVM --enable-debug build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A build with --enable-debug and without KVM will fail as follows: /usr/bin/ld: libqemu-riscv64-softmmu.fa.p/hw_riscv_virt.c.o: in function `virt_machine_init': ./qemu/build/../hw/riscv/virt.c:1465: undefined reference to `kvm_riscv_aia_create' This happens because the code block with "if virt_use_kvm_aia(s)" isn't being ignored by the debug build, resulting in an undefined reference to a KVM only function. Add a 'kvm_enabled()' conditional together with virt_use_kvm_aia() will make the compiler crop the kvm_riscv_aia_create() call entirely from a non-KVM build. Note that adding the 'kvm_enabled()' conditional inside virt_use_kvm_aia() won't fix the build because this function would need to be inlined multiple times to make the compiler zero out the entire block. While we're at it, use kvm_enabled() in all instances where virt_use_kvm_aia() is checked to allow the compiler to elide these other kvm-only instances as well. Suggested-by: Richard Henderson Fixes: dbdb99948e ("target/riscv: select KVM AIA in riscv virt machine") Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-ID: <20230830133503.711138-2-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis --- hw/riscv/virt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c index 496c17c644..5edc1d98d2 100644 --- a/hw/riscv/virt.c +++ b/hw/riscv/virt.c @@ -782,7 +782,7 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, } /* KVM AIA only has one APLIC instance */ - if (virt_use_kvm_aia(s)) { + if (kvm_enabled() && virt_use_kvm_aia(s)) { create_fdt_socket_aplic(s, memmap, 0, msi_m_phandle, msi_s_phandle, phandle, &intc_phandles[0], xplic_phandles, @@ -808,7 +808,7 @@ static void create_fdt_sockets(RISCVVirtState *s, const MemMapEntry *memmap, g_free(intc_phandles); - if (virt_use_kvm_aia(s)) { + if (kvm_enabled() && virt_use_kvm_aia(s)) { *irq_mmio_phandle = xplic_phandles[0]; *irq_virtio_phandle = xplic_phandles[0]; *irq_pcie_phandle = xplic_phandles[0]; @@ -1461,7 +1461,7 @@ static void virt_machine_init(MachineState *machine) } } - if (virt_use_kvm_aia(s)) { + if (kvm_enabled() && virt_use_kvm_aia(s)) { kvm_riscv_aia_create(machine, IMSIC_MMIO_GROUP_MIN_SHIFT, VIRT_IRQCHIP_NUM_SOURCES, VIRT_IRQCHIP_NUM_MSIS, memmap[VIRT_APLIC_S].base, From b815664091cced7ba89fce4e5b6544d16fed3d98 Mon Sep 17 00:00:00 2001 From: Daniel Henrique Barboza Date: Wed, 30 Aug 2023 10:35:03 -0300 Subject: [PATCH 39/45] hw/intc/riscv_aplic.c fix non-KVM --enable-debug build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 6df0b37e2ab breaks a --enable-debug build in a non-KVM environment with the following error: /usr/bin/ld: libqemu-riscv64-softmmu.fa.p/hw_intc_riscv_aplic.c.o: in function `riscv_kvm_aplic_request': ./qemu/build/../hw/intc/riscv_aplic.c:486: undefined reference to `kvm_set_irq' collect2: error: ld returned 1 exit status This happens because the debug build will poke into the 'if (is_kvm_aia(aplic->msimode))' block and fail to find a reference to the KVM only function riscv_kvm_aplic_request(). There are multiple solutions to fix this. We'll go with the same solution from the previous patch, i.e. add a kvm_enabled() conditional to filter out the block. But there's a catch: riscv_kvm_aplic_request() is a local function that would end up being used if the compiler crops the block, and this won't work. Quoting Richard Henderson's explanation in [1]: "(...) the compiler won't eliminate entire unused functions with -O0" We'll solve it by moving riscv_kvm_aplic_request() to kvm.c and add its declaration in kvm_riscv.h, where all other KVM specific public functions are already declared. Other archs handles KVM specific code in this manner and we expect to do the same from now on. [1] https://lore.kernel.org/qemu-riscv/d2f1ad02-eb03-138f-9d08-db676deeed05@linaro.org/ Signed-off-by: Daniel Henrique Barboza Reviewed-by: Andrew Jones Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-ID: <20230830133503.711138-3-dbarboza@ventanamicro.com> Signed-off-by: Alistair Francis --- hw/intc/riscv_aplic.c | 8 ++------ target/riscv/kvm.c | 5 +++++ target/riscv/kvm_riscv.h | 1 + 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/hw/intc/riscv_aplic.c b/hw/intc/riscv_aplic.c index 592c3ce768..99aae8ccbe 100644 --- a/hw/intc/riscv_aplic.c +++ b/hw/intc/riscv_aplic.c @@ -32,6 +32,7 @@ #include "target/riscv/cpu.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" +#include "kvm_riscv.h" #include "migration/vmstate.h" #define APLIC_MAX_IDC (1UL << 14) @@ -481,11 +482,6 @@ static uint32_t riscv_aplic_idc_claimi(RISCVAPLICState *aplic, uint32_t idc) return topi; } -static void riscv_kvm_aplic_request(void *opaque, int irq, int level) -{ - kvm_set_irq(kvm_state, irq, !!level); -} - static void riscv_aplic_request(void *opaque, int irq, int level) { bool update = false; @@ -840,7 +836,7 @@ static void riscv_aplic_realize(DeviceState *dev, Error **errp) * have IRQ lines delegated by their parent APLIC. */ if (!aplic->parent) { - if (is_kvm_aia(aplic->msimode)) { + if (kvm_enabled() && is_kvm_aia(aplic->msimode)) { qdev_init_gpio_in(dev, riscv_kvm_aplic_request, aplic->num_irqs); } else { qdev_init_gpio_in(dev, riscv_aplic_request, aplic->num_irqs); diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c index 6e909d0fdd..c01cfb03f4 100644 --- a/target/riscv/kvm.c +++ b/target/riscv/kvm.c @@ -46,6 +46,11 @@ #include "sysemu/runstate.h" #include "hw/riscv/numa.h" +void riscv_kvm_aplic_request(void *opaque, int irq, int level) +{ + kvm_set_irq(kvm_state, irq, !!level); +} + static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, uint64_t idx) { diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h index 7d4b7c60e2..de8c209ebc 100644 --- a/target/riscv/kvm_riscv.h +++ b/target/riscv/kvm_riscv.h @@ -26,5 +26,6 @@ void kvm_riscv_aia_create(MachineState *machine, uint64_t group_shift, uint64_t aia_irq_num, uint64_t aia_msi_num, uint64_t aplic_base, uint64_t imsic_base, uint64_t guest_num); +void riscv_kvm_aplic_request(void *opaque, int irq, int level); #endif From bb0a45e931967488949d293faa5ff4f4561fbdd4 Mon Sep 17 00:00:00 2001 From: Robbin Ehn Date: Mon, 28 Aug 2023 16:56:59 +0200 Subject: [PATCH 40/45] linux-user/riscv: Add new extensions to hwprobe This patch adds the new extensions in linux 6.5 to the hwprobe syscall. And fixes RVC check to OR with correct value. The previous variable contains 0 therefore it did work. Signed-off-by: Robbin Ehn Acked-by: Richard Henderson Acked-by: Alistair Francis Message-ID: Signed-off-by: Alistair Francis --- linux-user/syscall.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/linux-user/syscall.c b/linux-user/syscall.c index dac0641bab..3521a2d70b 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -8793,6 +8793,10 @@ static int do_getdents64(abi_long dirfd, abi_long arg2, abi_long count) #define RISCV_HWPROBE_KEY_IMA_EXT_0 4 #define RISCV_HWPROBE_IMA_FD (1 << 0) #define RISCV_HWPROBE_IMA_C (1 << 1) +#define RISCV_HWPROBE_IMA_V (1 << 2) +#define RISCV_HWPROBE_EXT_ZBA (1 << 3) +#define RISCV_HWPROBE_EXT_ZBB (1 << 4) +#define RISCV_HWPROBE_EXT_ZBS (1 << 5) #define RISCV_HWPROBE_KEY_CPUPERF_0 5 #define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0) @@ -8840,7 +8844,15 @@ static void risc_hwprobe_fill_pairs(CPURISCVState *env, riscv_has_ext(env, RVD) ? RISCV_HWPROBE_IMA_FD : 0; value |= riscv_has_ext(env, RVC) ? - RISCV_HWPROBE_IMA_C : pair->value; + RISCV_HWPROBE_IMA_C : 0; + value |= riscv_has_ext(env, RVV) ? + RISCV_HWPROBE_IMA_V : 0; + value |= cfg->ext_zba ? + RISCV_HWPROBE_EXT_ZBA : 0; + value |= cfg->ext_zbb ? + RISCV_HWPROBE_EXT_ZBB : 0; + value |= cfg->ext_zbs ? + RISCV_HWPROBE_EXT_ZBS : 0; __put_user(value, &pair->value); break; case RISCV_HWPROBE_KEY_CPUPERF_0: From 7d496bb50233d861032fb22b4fae050b246c9197 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 31 Aug 2023 17:41:18 +0200 Subject: [PATCH 41/45] target/riscv: Use accelerated helper for AES64KS1I MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the accelerated SubBytes/ShiftRows/AddRoundKey AES helper to implement the first half of the key schedule derivation. This does not actually involve shifting rows, so clone the same value into all four columns of the AES vector to counter that operation. Cc: Richard Henderson Cc: Philippe Mathieu-Daudé Cc: Palmer Dabbelt Cc: Alistair Francis Signed-off-by: Ard Biesheuvel Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Richard Henderson Message-ID: <20230831154118.138727-1-ardb@kernel.org> Signed-off-by: Alistair Francis --- target/riscv/crypto_helper.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/target/riscv/crypto_helper.c b/target/riscv/crypto_helper.c index 4d65945429..bb084e00ef 100644 --- a/target/riscv/crypto_helper.c +++ b/target/riscv/crypto_helper.c @@ -148,24 +148,17 @@ target_ulong HELPER(aes64ks1i)(target_ulong rs1, target_ulong rnum) uint8_t enc_rnum = rnum; uint32_t temp = (RS1 >> 32) & 0xFFFFFFFF; - uint8_t rcon_ = 0; - target_ulong result; + AESState t, rc = {}; if (enc_rnum != 0xA) { temp = ror32(temp, 8); /* Rotate right by 8 */ - rcon_ = round_consts[enc_rnum]; + rc.w[0] = rc.w[1] = round_consts[enc_rnum]; } - temp = ((uint32_t)AES_sbox[(temp >> 24) & 0xFF] << 24) | - ((uint32_t)AES_sbox[(temp >> 16) & 0xFF] << 16) | - ((uint32_t)AES_sbox[(temp >> 8) & 0xFF] << 8) | - ((uint32_t)AES_sbox[(temp >> 0) & 0xFF] << 0); + t.w[0] = t.w[1] = t.w[2] = t.w[3] = temp; + aesenc_SB_SR_AK(&t, &t, &rc, false); - temp ^= rcon_; - - result = ((uint64_t)temp << 32) | temp; - - return result; + return t.d[0]; } target_ulong HELPER(aes64im)(target_ulong rs1) From a7c272df82af11c568ea83921b04334791dccd5e Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 18 Aug 2023 12:40:58 +0900 Subject: [PATCH 42/45] target/riscv: Allocate itrigger timers only once MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit riscv_trigger_init() had been called on reset events that can happen several times for a CPU and it allocated timers for itrigger. If old timers were present, they were simply overwritten by the new timers, resulting in a memory leak. Divide riscv_trigger_init() into two functions, namely riscv_trigger_realize() and riscv_trigger_reset() and call them in appropriate timing. The timer allocation will happen only once for a CPU in riscv_trigger_realize(). Fixes: 5a4ae64cac ("target/riscv: Add itrigger support when icount is enabled") Signed-off-by: Akihiko Odaki Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: LIU Zhiwei Reviewed-by: Alistair Francis Message-ID: <20230818034059.9146-1-akihiko.odaki@daynix.com> Signed-off-by: Alistair Francis --- target/riscv/cpu.c | 8 +++++++- target/riscv/debug.c | 15 ++++++++++++--- target/riscv/debug.h | 3 ++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c index bf0912014e..f227c7664e 100644 --- a/target/riscv/cpu.c +++ b/target/riscv/cpu.c @@ -926,7 +926,7 @@ static void riscv_cpu_reset_hold(Object *obj) #ifndef CONFIG_USER_ONLY if (cpu->cfg.debug) { - riscv_trigger_init(env); + riscv_trigger_reset_hold(env); } if (kvm_enabled()) { @@ -1525,6 +1525,12 @@ static void riscv_cpu_realize(DeviceState *dev, Error **errp) riscv_cpu_register_gdb_regs_for_features(cs); +#ifndef CONFIG_USER_ONLY + if (cpu->cfg.debug) { + riscv_trigger_realize(&cpu->env); + } +#endif + qemu_init_vcpu(cs); cpu_reset(cs); diff --git a/target/riscv/debug.c b/target/riscv/debug.c index 211f5921b6..4945d1a1f2 100644 --- a/target/riscv/debug.c +++ b/target/riscv/debug.c @@ -903,7 +903,17 @@ bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp) return false; } -void riscv_trigger_init(CPURISCVState *env) +void riscv_trigger_realize(CPURISCVState *env) +{ + int i; + + for (i = 0; i < RV_MAX_TRIGGERS; i++) { + env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL, + riscv_itrigger_timer_cb, env); + } +} + +void riscv_trigger_reset_hold(CPURISCVState *env) { target_ulong tdata1 = build_tdata1(env, TRIGGER_TYPE_AD_MATCH, 0, 0); int i; @@ -928,7 +938,6 @@ void riscv_trigger_init(CPURISCVState *env) env->tdata3[i] = 0; env->cpu_breakpoint[i] = NULL; env->cpu_watchpoint[i] = NULL; - env->itrigger_timer[i] = timer_new_ns(QEMU_CLOCK_VIRTUAL, - riscv_itrigger_timer_cb, env); + timer_del(env->itrigger_timer[i]); } } diff --git a/target/riscv/debug.h b/target/riscv/debug.h index c471748d5a..5794aa6ee5 100644 --- a/target/riscv/debug.h +++ b/target/riscv/debug.h @@ -143,7 +143,8 @@ void riscv_cpu_debug_excp_handler(CPUState *cs); bool riscv_cpu_debug_check_breakpoint(CPUState *cs); bool riscv_cpu_debug_check_watchpoint(CPUState *cs, CPUWatchpoint *wp); -void riscv_trigger_init(CPURISCVState *env); +void riscv_trigger_realize(CPURISCVState *env); +void riscv_trigger_reset_hold(CPURISCVState *env); bool riscv_itrigger_enabled(CPURISCVState *env); void riscv_itrigger_update_priv(CPURISCVState *env); From 4e3adce1244e1ca30ec05874c3eca14911dc0825 Mon Sep 17 00:00:00 2001 From: Leon Schuermann Date: Tue, 29 Aug 2023 17:50:46 -0400 Subject: [PATCH 43/45] target/riscv/pmp.c: respect mseccfg.RLB for pmpaddrX changes When the rule-lock bypass (RLB) bit is set in the mseccfg CSR, the PMP configuration lock bits must not apply. While this behavior is implemented for the pmpcfgX CSRs, this bit is not respected for changes to the pmpaddrX CSRs. This patch ensures that pmpaddrX CSR writes work even on locked regions when the global rule-lock bypass is enabled. Signed-off-by: Leon Schuermann Reviewed-by: Mayuresh Chitale Reviewed-by: Alistair Francis Message-ID: <20230829215046.1430463-1-leon@is.currently.online> Signed-off-by: Alistair Francis --- target/riscv/pmp.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/target/riscv/pmp.c b/target/riscv/pmp.c index 9d8db493e6..5e60c26031 100644 --- a/target/riscv/pmp.c +++ b/target/riscv/pmp.c @@ -44,6 +44,10 @@ static inline uint8_t pmp_get_a_field(uint8_t cfg) */ static inline int pmp_is_locked(CPURISCVState *env, uint32_t pmp_index) { + /* mseccfg.RLB is set */ + if (MSECCFG_RLB_ISSET(env)) { + return 0; + } if (env->pmp_state.pmp[pmp_index].cfg_reg & PMP_LOCK) { return 1; From 4df282335b3b13db30123fbcca050e4bf690a9d9 Mon Sep 17 00:00:00 2001 From: Tommy Wu Date: Tue, 15 Aug 2023 23:16:43 -0700 Subject: [PATCH 44/45] target/riscv: Align the AIA model to v1.0 ratified spec According to the new spec, when vsiselect has a reserved value, attempts from M-mode or HS-mode to access vsireg, or from VS-mode to access sireg, should preferably raise an illegal instruction exception. Signed-off-by: Tommy Wu Reviewed-by: Frank Chang Message-ID: <20230816061647.600672-1-tommy.wu@sifive.com> Signed-off-by: Alistair Francis --- target/riscv/csr.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/target/riscv/csr.c b/target/riscv/csr.c index 63c3b0d9fc..68eecc3c96 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -1684,7 +1684,7 @@ static int rmw_iprio(target_ulong xlen, static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val, target_ulong new_val, target_ulong wr_mask) { - bool virt; + bool virt, isel_reserved; uint8_t *iprio; int ret = -EINVAL; target_ulong priv, isel, vgein; @@ -1694,6 +1694,7 @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val, /* Decode register details from CSR number */ virt = false; + isel_reserved = false; switch (csrno) { case CSR_MIREG: iprio = env->miprio; @@ -1738,11 +1739,13 @@ static int rmw_xireg(CPURISCVState *env, int csrno, target_ulong *val, riscv_cpu_mxl_bits(env)), val, new_val, wr_mask); } + } else { + isel_reserved = true; } done: if (ret) { - return (env->virt_enabled && virt) ? + return (env->virt_enabled && virt && !isel_reserved) ? RISCV_EXCP_VIRT_INSTRUCTION_FAULT : RISCV_EXCP_ILLEGAL_INST; } return RISCV_EXCP_NONE; From e7a03409f29e2da59297d55afbaec98c96e43e3a Mon Sep 17 00:00:00 2001 From: Nikita Shubin Date: Tue, 8 Aug 2023 12:09:14 +0300 Subject: [PATCH 45/45] target/riscv: don't read CSR in riscv_csrrw_do64 As per ISA: "For CSRRWI, if rd=x0, then the instruction shall not read the CSR and shall not cause any of the side effects that might occur on a CSR read." trans_csrrwi() and trans_csrrw() call do_csrw() if rd=x0, do_csrw() calls riscv_csrrw_do64(), via helper_csrw() passing NULL as *ret_value. Signed-off-by: Nikita Shubin Reviewed-by: Alistair Francis Message-ID: <20230808090914.17634-1-nikita.shubin@maquefel.me> Signed-off-by: Alistair Francis --- target/riscv/csr.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/target/riscv/csr.c b/target/riscv/csr.c index 68eecc3c96..85a31dc420 100644 --- a/target/riscv/csr.c +++ b/target/riscv/csr.c @@ -3917,21 +3917,27 @@ static RISCVException riscv_csrrw_do64(CPURISCVState *env, int csrno, target_ulong write_mask) { RISCVException ret; - target_ulong old_value; + target_ulong old_value = 0; /* execute combined read/write operation if it exists */ if (csr_ops[csrno].op) { return csr_ops[csrno].op(env, csrno, ret_value, new_value, write_mask); } - /* if no accessor exists then return failure */ - if (!csr_ops[csrno].read) { - return RISCV_EXCP_ILLEGAL_INST; - } - /* read old value */ - ret = csr_ops[csrno].read(env, csrno, &old_value); - if (ret != RISCV_EXCP_NONE) { - return ret; + /* + * ret_value == NULL means that rd=x0 and we're coming from helper_csrw() + * and we can't throw side effects caused by CSR reads. + */ + if (ret_value) { + /* if no accessor exists then return failure */ + if (!csr_ops[csrno].read) { + return RISCV_EXCP_ILLEGAL_INST; + } + /* read old value */ + ret = csr_ops[csrno].read(env, csrno, &old_value); + if (ret != RISCV_EXCP_NONE) { + return ret; + } } /* write value if writable and write mask set, otherwise drop writes */