From d721cc94b25a100980c767beca9daae6c2cac6b0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 07:08:54 -1000 Subject: [PATCH 01/23] tcg/mips: Move TCG_AREG0 to S8 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change; just moving the saved reserved regs to the end. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 4 ++-- tcg/mips/tcg-target.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index ef146b193c..ee6c2eb872 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2183,7 +2183,7 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) } static const int tcg_target_callee_save_regs[] = { - TCG_REG_S0, /* used for the global env (TCG_AREG0) */ + TCG_REG_S0, TCG_REG_S1, TCG_REG_S2, TCG_REG_S3, @@ -2191,7 +2191,7 @@ static const int tcg_target_callee_save_regs[] = { TCG_REG_S5, TCG_REG_S6, TCG_REG_S7, - TCG_REG_S8, + TCG_REG_S8, /* used for the global env (TCG_AREG0) */ TCG_REG_RA, /* should be last for ABI compliance */ }; diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 7277a117ef..46b63e59cc 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -76,7 +76,7 @@ typedef enum { TCG_REG_RA, TCG_REG_CALL_STACK = TCG_REG_SP, - TCG_AREG0 = TCG_REG_S0, + TCG_AREG0 = TCG_REG_S8, } TCGReg; /* used for function call generation */ From 43b4cd97131f3e99e6bf397ed3874855fac9cdff Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 07:10:57 -1000 Subject: [PATCH 02/23] tcg/mips: Move TCG_GUEST_BASE_REG to S7 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No functional change; just moving the saved reserved regs to the end. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index ee6c2eb872..f322891797 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -86,7 +86,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #define TCG_TMP3 TCG_REG_T7 #ifndef CONFIG_SOFTMMU -#define TCG_GUEST_BASE_REG TCG_REG_S1 +#define TCG_GUEST_BASE_REG TCG_REG_S7 #endif /* check if we really need so many registers :P */ @@ -2190,7 +2190,7 @@ static const int tcg_target_callee_save_regs[] = { TCG_REG_S4, TCG_REG_S5, TCG_REG_S6, - TCG_REG_S7, + TCG_REG_S7, /* used for guest_base */ TCG_REG_S8, /* used for the global env (TCG_AREG0) */ TCG_REG_RA, /* should be last for ABI compliance */ }; From f63eb2e59f45a2307f2e3f41e82e76f7abcd03f0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 07:13:46 -1000 Subject: [PATCH 03/23] tcg/mips: Unify TCG_GUEST_BASE_REG tests In tcg_out_qemu_ld/st, we already check for guest_base matching int16_t. Mirror that when setting up TCG_GUEST_BASE_REG in the prologue. Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index f322891797..ccb3a1cd9a 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -2312,7 +2312,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) } #ifndef CONFIG_SOFTMMU - if (guest_base) { + if (guest_base != (int16_t)guest_base) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } From 53c4fa2726629b2f34a23082e6f985bd36c3e1f7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 09:28:31 -1000 Subject: [PATCH 04/23] tcg/mips: Create and use TCG_REG_TB This vastly reduces the size of code generated for 64-bit addresses. The code for exit_tb, for instance, where we load a (tagged) pointer to the current TB, goes from 0x400aa9725c: li v0,64 0x400aa97260: dsll v0,v0,0x10 0x400aa97264: ori v0,v0,0xaa9 0x400aa97268: dsll v0,v0,0x10 0x400aa9726c: j 0x400aa9703c 0x400aa97270: ori v0,v0,0x7083 to 0x400aa97240: j 0x400aa97040 0x400aa97244: daddiu v0,s6,-189 Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 69 +++++++++++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 10 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index ccb3a1cd9a..6f03b44ac0 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -88,6 +88,11 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #ifndef CONFIG_SOFTMMU #define TCG_GUEST_BASE_REG TCG_REG_S7 #endif +#if TCG_TARGET_REG_BITS == 64 +#define TCG_REG_TB TCG_REG_S6 +#else +#define TCG_REG_TB (qemu_build_not_reached(), TCG_REG_ZERO) +#endif /* check if we really need so many registers :P */ static const int tcg_target_reg_alloc_order[] = { @@ -1547,27 +1552,61 @@ static void tcg_out_clz(TCGContext *s, MIPSInsn opcv2, MIPSInsn opcv6, static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) { - TCGReg b0 = TCG_REG_ZERO; + TCGReg base = TCG_REG_ZERO; + int16_t lo = 0; - if (a0 & ~0xffff) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); - b0 = TCG_REG_V0; + if (a0) { + intptr_t ofs; + if (TCG_TARGET_REG_BITS == 64) { + ofs = tcg_tbrel_diff(s, (void *)a0); + lo = ofs; + if (ofs == lo) { + base = TCG_REG_TB; + } else { + base = TCG_REG_V0; + tcg_out_movi(s, TCG_TYPE_PTR, base, ofs - lo); + tcg_out_opc_reg(s, ALIAS_PADD, base, base, TCG_REG_TB); + } + } else { + ofs = a0; + lo = ofs; + base = TCG_REG_V0; + tcg_out_movi(s, TCG_TYPE_PTR, base, ofs - lo); + } } if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)tb_ret_addr); tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); } - tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); + /* delay slot */ + tcg_out_opc_imm(s, ALIAS_PADDI, TCG_REG_V0, base, lo); } static void tcg_out_goto_tb(TCGContext *s, int which) { + intptr_t ofs = get_jmp_target_addr(s, which); + TCGReg base, dest; + /* indirect jump method */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, - get_jmp_target_addr(s, which)); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); + if (TCG_TARGET_REG_BITS == 64) { + dest = TCG_REG_TB; + base = TCG_REG_TB; + ofs = tcg_tbrel_diff(s, (void *)ofs); + } else { + dest = TCG_TMP0; + base = TCG_REG_ZERO; + } + tcg_out_ld(s, TCG_TYPE_PTR, dest, base, ofs); + tcg_out_opc_reg(s, OPC_JR, 0, dest, 0); + /* delay slot */ tcg_out_nop(s); + set_jmp_reset_offset(s, which); + if (TCG_TARGET_REG_BITS == 64) { + /* For the unlinked case, need to reset TCG_REG_TB. */ + tcg_out_ldst(s, ALIAS_PADDI, TCG_REG_TB, TCG_REG_TB, + -tcg_current_code_size(s)); + } } void tb_target_set_jmp_target(const TranslationBlock *tb, int n, @@ -1598,7 +1637,11 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_goto_ptr: /* jmp to the given host address (could be epilogue) */ tcg_out_opc_reg(s, OPC_JR, 0, a0, 0); - tcg_out_nop(s); + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, a0); + } else { + tcg_out_nop(s); + } break; case INDEX_op_br: tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, @@ -2189,7 +2232,7 @@ static const int tcg_target_callee_save_regs[] = { TCG_REG_S3, TCG_REG_S4, TCG_REG_S5, - TCG_REG_S6, + TCG_REG_S6, /* used for the tb base (TCG_REG_TB) */ TCG_REG_S7, /* used for guest_base */ TCG_REG_S8, /* used for the global env (TCG_AREG0) */ TCG_REG_RA, /* should be last for ABI compliance */ @@ -2317,6 +2360,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif + if (TCG_TARGET_REG_BITS == 64) { + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]); + } /* Call generated code */ tcg_out_opc_reg(s, OPC_JR, 0, tcg_target_call_iarg_regs[1], 0); @@ -2498,6 +2544,9 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA); /* return address */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP); /* stack pointer */ tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP); /* global pointer */ + if (TCG_TARGET_REG_BITS == 64) { + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); /* tc->tc_ptr */ + } } typedef struct { From 47a572865ab8fc7772113ed15fb6e618b8d5763a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 12:02:59 -1000 Subject: [PATCH 05/23] tcg/mips: Split out tcg_out_movi_one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emit all constants that can be loaded in exactly one insn. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 6f03b44ac0..bbff510c46 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -510,20 +510,34 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) return true; } +static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) +{ + if (arg == (int16_t)arg) { + tcg_out_opc_imm(s, OPC_ADDIU, ret, TCG_REG_ZERO, arg); + return true; + } + if (arg == (uint16_t)arg) { + tcg_out_opc_imm(s, OPC_ORI, ret, TCG_REG_ZERO, arg); + return true; + } + if (arg == (int32_t)arg && (arg & 0xffff) == 0) { + tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16); + return true; + } + return false; +} + static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg) { if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { arg = (int32_t)arg; } - if (arg == (int16_t)arg) { - tcg_out_opc_imm(s, OPC_ADDIU, ret, TCG_REG_ZERO, arg); - return; - } - if (arg == (uint16_t)arg) { - tcg_out_opc_imm(s, OPC_ORI, ret, TCG_REG_ZERO, arg); + + if (tcg_out_movi_one(s, ret, arg)) { return; } + if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) { tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16); } else { From 1d9c5b30846930fa29cd8cfe3b4f66396cc181a0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 12:14:53 -1000 Subject: [PATCH 06/23] tcg/mips: Split out tcg_out_movi_two Emit all 32-bit signed constants, which can be loaded in two insns. Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 35 ++++++++++++++++++++++++----------- 1 file changed, 24 insertions(+), 11 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index bbff510c46..7a19f8db1d 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -527,6 +527,22 @@ static bool tcg_out_movi_one(TCGContext *s, TCGReg ret, tcg_target_long arg) return false; } +static bool tcg_out_movi_two(TCGContext *s, TCGReg ret, tcg_target_long arg) +{ + /* + * All signed 32-bit constants are loadable with two immediates, + * and everything else requires more work. + */ + if (arg == (int32_t)arg) { + if (!tcg_out_movi_one(s, ret, arg)) { + tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16); + tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg & 0xffff); + } + return true; + } + return false; +} + static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg) { @@ -534,21 +550,18 @@ static void tcg_out_movi(TCGContext *s, TCGType type, arg = (int32_t)arg; } - if (tcg_out_movi_one(s, ret, arg)) { + /* Load all 32-bit constants. */ + if (tcg_out_movi_two(s, ret, arg)) { return; } - if (TCG_TARGET_REG_BITS == 32 || arg == (int32_t)arg) { - tcg_out_opc_imm(s, OPC_LUI, ret, TCG_REG_ZERO, arg >> 16); + tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1); + if (arg & 0xffff0000ull) { + tcg_out_dsll(s, ret, ret, 16); + tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg >> 16); + tcg_out_dsll(s, ret, ret, 16); } else { - tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1); - if (arg & 0xffff0000ull) { - tcg_out_dsll(s, ret, ret, 16); - tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg >> 16); - tcg_out_dsll(s, ret, ret, 16); - } else { - tcg_out_dsll(s, ret, ret, 32); - } + tcg_out_dsll(s, ret, ret, 32); } if (arg & 0xffff) { tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg & 0xffff); From 48c12ba748bae9d8c5d20748226115f91fb88ad9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 13:07:41 -1000 Subject: [PATCH 07/23] tcg/mips: Use the constant pool for 64-bit constants During normal processing, the constant pool is accessible via TCG_REG_TB. During the prologue, it is accessible via TCG_REG_T9. Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 65 +++++++++++++++++++++++++++++---------- tcg/mips/tcg-target.h | 1 + 2 files changed, 49 insertions(+), 17 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 7a19f8db1d..3b840ecc4c 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -25,6 +25,7 @@ */ #include "../tcg-ldst.c.inc" +#include "../tcg-pool.c.inc" #if HOST_BIG_ENDIAN # define MIPS_BE 1 @@ -168,9 +169,18 @@ static bool reloc_pc16(tcg_insn_unit *src_rw, const tcg_insn_unit *target) static bool patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) { - tcg_debug_assert(type == R_MIPS_PC16); - tcg_debug_assert(addend == 0); - return reloc_pc16(code_ptr, (const tcg_insn_unit *)value); + value += addend; + switch (type) { + case R_MIPS_PC16: + return reloc_pc16(code_ptr, (const tcg_insn_unit *)value); + case R_MIPS_16: + if (value != (int16_t)value) { + return false; + } + *code_ptr = deposit32(*code_ptr, 0, 16, value); + return true; + } + g_assert_not_reached(); } #define TCG_CT_CONST_ZERO 0x100 @@ -486,6 +496,11 @@ static void tcg_out_nop(TCGContext *s) tcg_out32(s, 0); } +static void tcg_out_nop_fill(tcg_insn_unit *p, int count) +{ + memset(p, 0, count * sizeof(tcg_insn_unit)); +} + static void tcg_out_dsll(TCGContext *s, TCGReg rd, TCGReg rt, TCGArg sa) { tcg_out_opc_sa64(s, OPC_DSLL, OPC_DSLL32, rd, rt, sa); @@ -543,8 +558,15 @@ static bool tcg_out_movi_two(TCGContext *s, TCGReg ret, tcg_target_long arg) return false; } -static void tcg_out_movi(TCGContext *s, TCGType type, - TCGReg ret, tcg_target_long arg) +static void tcg_out_movi_pool(TCGContext *s, TCGReg ret, + tcg_target_long arg, TCGReg tbreg) +{ + new_pool_label(s, arg, R_MIPS_16, s->code_ptr, tcg_tbrel_diff(s, NULL)); + tcg_out_opc_imm(s, OPC_LD, ret, tbreg, 0); +} + +static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, + tcg_target_long arg, TCGReg tbreg) { if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { arg = (int32_t)arg; @@ -554,18 +576,17 @@ static void tcg_out_movi(TCGContext *s, TCGType type, if (tcg_out_movi_two(s, ret, arg)) { return; } + assert(TCG_TARGET_REG_BITS == 64); - tcg_out_movi(s, TCG_TYPE_I32, ret, arg >> 31 >> 1); - if (arg & 0xffff0000ull) { - tcg_out_dsll(s, ret, ret, 16); - tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg >> 16); - tcg_out_dsll(s, ret, ret, 16); - } else { - tcg_out_dsll(s, ret, ret, 32); - } - if (arg & 0xffff) { - tcg_out_opc_imm(s, OPC_ORI, ret, ret, arg & 0xffff); - } + /* Otherwise, put 64-bit constants into the constant pool. */ + tcg_out_movi_pool(s, ret, arg, tbreg); +} + +static void tcg_out_movi(TCGContext *s, TCGType type, + TCGReg ret, tcg_target_long arg) +{ + TCGReg tbreg = TCG_TARGET_REG_BITS == 64 ? TCG_REG_TB : 0; + tcg_out_movi_int(s, type, ret, arg, tbreg); } static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg rd, TCGReg rs) @@ -2383,10 +2404,20 @@ static void tcg_target_qemu_prologue(TCGContext *s) #ifndef CONFIG_SOFTMMU if (guest_base != (int16_t)guest_base) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base); + /* + * The function call abi for n32 and n64 will have loaded $25 (t9) + * with the address of the prologue, so we can use that instead + * of TCG_REG_TB. + */ +#if TCG_TARGET_REG_BITS == 64 && !defined(__mips_abicalls) +# error "Unknown mips abi" +#endif + tcg_out_movi_int(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, guest_base, + TCG_TARGET_REG_BITS == 64 ? TCG_REG_T9 : 0); tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG); } #endif + if (TCG_TARGET_REG_BITS == 64) { tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_TB, tcg_target_call_iarg_regs[1]); } diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 46b63e59cc..8fbb6c6507 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -208,5 +208,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_DEFAULT_MO 0 #define TCG_TARGET_NEED_LDST_LABELS +#define TCG_TARGET_NEED_POOL_LABELS #endif From 4316de32e71ae2626bbf8483a0265c61d3e3a05f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 13:17:20 -1000 Subject: [PATCH 08/23] tcg/mips: Aggressively use the constant pool for n64 calls Repeated calls to a single helper are common -- especially the ones for softmmu memory access. Prefer the constant pool to longer sequences to increase sharing. Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 3b840ecc4c..068deab8c9 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1101,9 +1101,19 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) { - /* Note that the ABI requires the called function's address to be - loaded into T9, even if a direct branch is in range. */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg); + /* + * Note that __mips_abicalls requires the called function's address + * to be loaded into $25 (t9), even if a direct branch is in range. + * + * For n64, always drop the pointer into the constant pool. + * We can re-use helper addresses often and do not want any + * of the longer sequences tcg_out_movi may try. + */ + if (sizeof(uintptr_t) == 8) { + tcg_out_movi_pool(s, TCG_REG_T9, (uintptr_t)arg, TCG_REG_TB); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T9, (uintptr_t)arg); + } /* But do try a direct branch, allowing the cpu better insn prefetch. */ if (tail) { From 1d159e64cca6497565cdcbb0a8383fc8568b4983 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 11:30:35 -1000 Subject: [PATCH 09/23] tcg/mips: Try tb-relative addresses in tcg_out_movi These addresses are often loaded by the qemu_ld/st slow path, for loading the retaddr value. Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 068deab8c9..9fab424ecc 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -568,6 +568,8 @@ static void tcg_out_movi_pool(TCGContext *s, TCGReg ret, static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg, TCGReg tbreg) { + tcg_target_long tmp; + if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { arg = (int32_t)arg; } @@ -578,6 +580,17 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } assert(TCG_TARGET_REG_BITS == 64); + /* Load addresses within 2GB of TB with 1 or 3 insns. */ + tmp = tcg_tbrel_diff(s, (void *)arg); + if (tmp == (int16_t)tmp) { + tcg_out_opc_imm(s, OPC_DADDIU, ret, tbreg, tmp); + return; + } + if (tcg_out_movi_two(s, ret, tmp)) { + tcg_out_opc_reg(s, OPC_DADDU, ret, ret, tbreg); + return; + } + /* Otherwise, put 64-bit constants into the constant pool. */ tcg_out_movi_pool(s, ret, arg, tbreg); } From 269e93ab76bc8b6239e12e427591fa46fb8c5be8 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Fri, 6 Aug 2021 11:18:25 -1000 Subject: [PATCH 10/23] tcg/mips: Try three insns with shift and add in tcg_out_movi These sequences are inexpensive to test. Maxing out at three insns results in the same space as a load plus the constant pool entry. Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 44 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 9fab424ecc..b86a0679af 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -569,6 +569,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg, TCGReg tbreg) { tcg_target_long tmp; + int sh, lo; if (TCG_TARGET_REG_BITS == 64 && type == TCG_TYPE_I32) { arg = (int32_t)arg; @@ -591,6 +592,49 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, return; } + /* + * Load bitmasks with a right-shift. This is good for things + * like 0x0fff_ffff_ffff_fff0: ADDUI r,0,0xff00 + DSRL r,r,4. + * or similarly using LUI. For this to work, bit 31 must be set. + */ + if (arg > 0 && (int32_t)arg < 0) { + sh = clz64(arg); + if (tcg_out_movi_one(s, ret, arg << sh)) { + tcg_out_dsrl(s, ret, ret, sh); + return; + } + } + + /* + * Load slightly larger constants using left-shift. + * Limit this sequence to 3 insns to avoid too much expansion. + */ + sh = ctz64(arg); + if (sh && tcg_out_movi_two(s, ret, arg >> sh)) { + tcg_out_dsll(s, ret, ret, sh); + return; + } + + /* + * Load slightly larger constants using left-shift and add/or. + * Prefer addi with a negative immediate when that would produce + * a larger shift. For this to work, bits 15 and 16 must be set. + */ + lo = arg & 0xffff; + if (lo) { + if ((arg & 0x18000) == 0x18000) { + lo = (int16_t)arg; + } + tmp = arg - lo; + sh = ctz64(tmp); + tmp >>= sh; + if (tcg_out_movi_one(s, ret, tmp)) { + tcg_out_dsll(s, ret, ret, sh); + tcg_out_opc_imm(s, lo < 0 ? OPC_DADDIU : OPC_ORI, ret, ret, lo); + return; + } + } + /* Otherwise, put 64-bit constants into the constant pool. */ tcg_out_movi_pool(s, ret, arg, tbreg); } From c64ed451a9e9e5c8d27c4cf4c41940171a3b2afd Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 28 Nov 2022 12:31:56 -0800 Subject: [PATCH 11/23] tcg/mips: Use qemu_build_not_reached for LO/HI_OFF The new(ish) macro produces a compile-time error instead of a link-time error. Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index b86a0679af..fd92cc30ca 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -37,11 +37,9 @@ # define LO_OFF (MIPS_BE * 4) # define HI_OFF (4 - LO_OFF) #else -/* To assert at compile-time that these values are never used - for TCG_TARGET_REG_BITS == 64. */ -int link_error(void); -# define LO_OFF link_error() -# define HI_OFF link_error() +/* Assert at compile-time that these values are never used for 64-bit. */ +# define LO_OFF ({ qemu_build_not_reached(); 0; }) +# define HI_OFF ({ qemu_build_not_reached(); 0; }) #endif #ifdef CONFIG_DEBUG_TCG From b56d5a8a4b0352bd499e026b8bdbdcf5f12753ac Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 17 May 2023 06:56:44 -0700 Subject: [PATCH 12/23] tcg/mips: Replace MIPS_BE with HOST_BIG_ENDIAN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since e03b56863d2b, which replaced HOST_WORDS_BIGENDIAN with HOST_BIG_ENDIAN, there is no need to define a second symbol which is [0,1]. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 46 +++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index fd92cc30ca..3274d9aace 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -27,14 +27,8 @@ #include "../tcg-ldst.c.inc" #include "../tcg-pool.c.inc" -#if HOST_BIG_ENDIAN -# define MIPS_BE 1 -#else -# define MIPS_BE 0 -#endif - #if TCG_TARGET_REG_BITS == 32 -# define LO_OFF (MIPS_BE * 4) +# define LO_OFF (HOST_BIG_ENDIAN * 4) # define HI_OFF (4 - LO_OFF) #else /* Assert at compile-time that these values are never used for 64-bit. */ @@ -1439,7 +1433,7 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, /* Prefer to load from offset 0 first, but allow for overlap. */ if (TCG_TARGET_REG_BITS == 64) { tcg_out_opc_imm(s, OPC_LD, lo, base, 0); - } else if (MIPS_BE ? hi != base : lo == base) { + } else if (HOST_BIG_ENDIAN ? hi != base : lo == base) { tcg_out_opc_imm(s, OPC_LW, hi, base, HI_OFF); tcg_out_opc_imm(s, OPC_LW, lo, base, LO_OFF); } else { @@ -1455,10 +1449,10 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg lo, TCGReg hi, static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc, TCGType type) { - const MIPSInsn lw1 = MIPS_BE ? OPC_LWL : OPC_LWR; - const MIPSInsn lw2 = MIPS_BE ? OPC_LWR : OPC_LWL; - const MIPSInsn ld1 = MIPS_BE ? OPC_LDL : OPC_LDR; - const MIPSInsn ld2 = MIPS_BE ? OPC_LDR : OPC_LDL; + const MIPSInsn lw1 = HOST_BIG_ENDIAN ? OPC_LWL : OPC_LWR; + const MIPSInsn lw2 = HOST_BIG_ENDIAN ? OPC_LWR : OPC_LWL; + const MIPSInsn ld1 = HOST_BIG_ENDIAN ? OPC_LDL : OPC_LDR; + const MIPSInsn ld2 = HOST_BIG_ENDIAN ? OPC_LDR : OPC_LDL; bool sgn = opc & MO_SIGN; switch (opc & MO_SIZE) { @@ -1497,10 +1491,10 @@ static void tcg_out_qemu_ld_unalign(TCGContext *s, TCGReg lo, TCGReg hi, tcg_out_opc_imm(s, ld1, lo, base, 0); tcg_out_opc_imm(s, ld2, lo, base, 7); } else { - tcg_out_opc_imm(s, lw1, MIPS_BE ? hi : lo, base, 0 + 0); - tcg_out_opc_imm(s, lw2, MIPS_BE ? hi : lo, base, 0 + 3); - tcg_out_opc_imm(s, lw1, MIPS_BE ? lo : hi, base, 4 + 0); - tcg_out_opc_imm(s, lw2, MIPS_BE ? lo : hi, base, 4 + 3); + tcg_out_opc_imm(s, lw1, HOST_BIG_ENDIAN ? hi : lo, base, 0 + 0); + tcg_out_opc_imm(s, lw2, HOST_BIG_ENDIAN ? hi : lo, base, 0 + 3); + tcg_out_opc_imm(s, lw1, HOST_BIG_ENDIAN ? lo : hi, base, 4 + 0); + tcg_out_opc_imm(s, lw2, HOST_BIG_ENDIAN ? lo : hi, base, 4 + 3); } break; @@ -1550,8 +1544,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, if (TCG_TARGET_REG_BITS == 64) { tcg_out_opc_imm(s, OPC_SD, lo, base, 0); } else { - tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? hi : lo, base, 0); - tcg_out_opc_imm(s, OPC_SW, MIPS_BE ? lo : hi, base, 4); + tcg_out_opc_imm(s, OPC_SW, HOST_BIG_ENDIAN ? hi : lo, base, 0); + tcg_out_opc_imm(s, OPC_SW, HOST_BIG_ENDIAN ? lo : hi, base, 4); } break; default: @@ -1562,10 +1556,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg lo, TCGReg hi, static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi, TCGReg base, MemOp opc) { - const MIPSInsn sw1 = MIPS_BE ? OPC_SWL : OPC_SWR; - const MIPSInsn sw2 = MIPS_BE ? OPC_SWR : OPC_SWL; - const MIPSInsn sd1 = MIPS_BE ? OPC_SDL : OPC_SDR; - const MIPSInsn sd2 = MIPS_BE ? OPC_SDR : OPC_SDL; + const MIPSInsn sw1 = HOST_BIG_ENDIAN ? OPC_SWL : OPC_SWR; + const MIPSInsn sw2 = HOST_BIG_ENDIAN ? OPC_SWR : OPC_SWL; + const MIPSInsn sd1 = HOST_BIG_ENDIAN ? OPC_SDL : OPC_SDR; + const MIPSInsn sd2 = HOST_BIG_ENDIAN ? OPC_SDR : OPC_SDL; switch (opc & MO_SIZE) { case MO_16: @@ -1584,10 +1578,10 @@ static void tcg_out_qemu_st_unalign(TCGContext *s, TCGReg lo, TCGReg hi, tcg_out_opc_imm(s, sd1, lo, base, 0); tcg_out_opc_imm(s, sd2, lo, base, 7); } else { - tcg_out_opc_imm(s, sw1, MIPS_BE ? hi : lo, base, 0 + 0); - tcg_out_opc_imm(s, sw2, MIPS_BE ? hi : lo, base, 0 + 3); - tcg_out_opc_imm(s, sw1, MIPS_BE ? lo : hi, base, 4 + 0); - tcg_out_opc_imm(s, sw2, MIPS_BE ? lo : hi, base, 4 + 3); + tcg_out_opc_imm(s, sw1, HOST_BIG_ENDIAN ? hi : lo, base, 0 + 0); + tcg_out_opc_imm(s, sw2, HOST_BIG_ENDIAN ? hi : lo, base, 0 + 3); + tcg_out_opc_imm(s, sw1, HOST_BIG_ENDIAN ? lo : hi, base, 4 + 0); + tcg_out_opc_imm(s, sw2, HOST_BIG_ENDIAN ? lo : hi, base, 4 + 3); } break; From d397be9a2256a7cc00a2b00355ad9c869ad61493 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Apr 2023 13:33:05 +0100 Subject: [PATCH 13/23] disas/riscv: Decode czero.{eqz,nez} Acked-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- disas/riscv.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/disas/riscv.c b/disas/riscv.c index e61bda5674..d597161d46 100644 --- a/disas/riscv.c +++ b/disas/riscv.c @@ -962,6 +962,8 @@ typedef enum { rv_op_cm_mvsa01 = 786, rv_op_cm_jt = 787, rv_op_cm_jalt = 788, + rv_op_czero_eqz = 789, + rv_op_czero_nez = 790, } rv_op; /* structures */ @@ -2119,6 +2121,8 @@ const rv_opcode_data opcode_data[] = { { "cm.mvsa01", rv_codec_zcmp_cm_mv, rv_fmt_rd_rs2, NULL, 0, 0, 0 }, { "cm.jt", rv_codec_zcmt_jt, rv_fmt_zcmt_index, NULL, 0 }, { "cm.jalt", rv_codec_zcmt_jt, rv_fmt_zcmt_index, NULL, 0 }, + { "czero.eqz", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, + { "czero.nez", rv_codec_r, rv_fmt_rd_rs1_rs2, NULL, 0, 0, 0 }, }; /* CSR names */ @@ -2914,6 +2918,8 @@ static void decode_inst_opcode(rv_decode *dec, rv_isa isa) case 45: op = rv_op_minu; break; case 46: op = rv_op_max; break; case 47: op = rv_op_maxu; break; + case 075: op = rv_op_czero_eqz; break; + case 077: op = rv_op_czero_nez; break; case 130: op = rv_op_sh1add; break; case 132: op = rv_op_sh2add; break; case 134: op = rv_op_sh3add; break; From 9e3e0bc6ac38e339aa142afa23604382f9963ed1 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 Apr 2023 15:29:03 +0100 Subject: [PATCH 14/23] tcg/riscv: Probe for Zba, Zbb, Zicond extensions Define a useful subset of the extensions. Probe for them via compiler pre-processor feature macros and SIGILL. Acked-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 96 ++++++++++++++++++++++++++++++++++++++ tcg/riscv/tcg-target.h | 6 +++ 2 files changed, 102 insertions(+) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index ff6334980f..eb3e2e9eb0 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -113,6 +113,20 @@ static const int tcg_target_call_iarg_regs[] = { TCG_REG_A7, }; +#ifndef have_zbb +bool have_zbb; +#endif +#if defined(__riscv_arch_test) && defined(__riscv_zba) +# define have_zba true +#else +static bool have_zba; +#endif +#if defined(__riscv_arch_test) && defined(__riscv_zicond) +# define have_zicond true +#else +static bool have_zicond; +#endif + static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) { tcg_debug_assert(kind == TCG_CALL_RET_NORMAL); @@ -234,6 +248,34 @@ typedef enum { OPC_FENCE = 0x0000000f, OPC_NOP = OPC_ADDI, /* nop = addi r0,r0,0 */ + + /* Zba: Bit manipulation extension, address generation */ + OPC_ADD_UW = 0x0800003b, + + /* Zbb: Bit manipulation extension, basic bit manipulaton */ + OPC_ANDN = 0x40007033, + OPC_CLZ = 0x60001013, + OPC_CLZW = 0x6000101b, + OPC_CPOP = 0x60201013, + OPC_CPOPW = 0x6020101b, + OPC_CTZ = 0x60101013, + OPC_CTZW = 0x6010101b, + OPC_ORN = 0x40006033, + OPC_REV8 = 0x6b805013, + OPC_ROL = 0x60001033, + OPC_ROLW = 0x6000103b, + OPC_ROR = 0x60005033, + OPC_RORW = 0x6000503b, + OPC_RORI = 0x60005013, + OPC_RORIW = 0x6000501b, + OPC_SEXT_B = 0x60401013, + OPC_SEXT_H = 0x60501013, + OPC_XNOR = 0x40004033, + OPC_ZEXT_H = 0x0800403b, + + /* Zicond: integer conditional operations */ + OPC_CZERO_EQZ = 0x0e005033, + OPC_CZERO_NEZ = 0x0e007033, } RISCVInsn; /* @@ -1619,8 +1661,62 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_RA, 0); } +static volatile sig_atomic_t got_sigill; + +static void sigill_handler(int signo, siginfo_t *si, void *data) +{ + /* Skip the faulty instruction */ + ucontext_t *uc = (ucontext_t *)data; + uc->uc_mcontext.__gregs[REG_PC] += 4; + + got_sigill = 1; +} + +static void tcg_target_detect_isa(void) +{ +#if !defined(have_zba) || !defined(have_zbb) || !defined(have_zicond) + /* + * TODO: It is expected that this will be determinable via + * linux riscv_hwprobe syscall, not yet merged. + * In the meantime, test via sigill. + */ + + struct sigaction sa_old, sa_new; + + memset(&sa_new, 0, sizeof(sa_new)); + sa_new.sa_flags = SA_SIGINFO; + sa_new.sa_sigaction = sigill_handler; + sigaction(SIGILL, &sa_new, &sa_old); + +#ifndef have_zba + /* Probe for Zba: add.uw zero,zero,zero. */ + got_sigill = 0; + asm volatile(".insn r 0x3b, 0, 0x04, zero, zero, zero" : : : "memory"); + have_zba = !got_sigill; +#endif + +#ifndef have_zbb + /* Probe for Zba: andn zero,zero,zero. */ + got_sigill = 0; + asm volatile(".insn r 0x33, 7, 0x20, zero, zero, zero" : : : "memory"); + have_zbb = !got_sigill; +#endif + +#ifndef have_zicond + /* Probe for Zicond: czero.eqz zero,zero,zero. */ + got_sigill = 0; + asm volatile(".insn r 0x33, 5, 0x07, zero, zero, zero" : : : "memory"); + have_zicond = !got_sigill; +#endif + + sigaction(SIGILL, &sa_old, NULL); +#endif +} + static void tcg_target_init(TCGContext *s) { + tcg_target_detect_isa(); + tcg_target_available_regs[TCG_TYPE_I32] = 0xffffffff; tcg_target_available_regs[TCG_TYPE_I64] = 0xffffffff; diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 494c986b49..863ac8ba2f 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -90,6 +90,12 @@ typedef enum { #define TCG_TARGET_CALL_ARG_I128 TCG_CALL_ARG_NORMAL #define TCG_TARGET_CALL_RET_I128 TCG_CALL_RET_NORMAL +#if defined(__riscv_arch_test) && defined(__riscv_zbb) +# define have_zbb true +#else +extern bool have_zbb; +#endif + /* optional instructions */ #define TCG_TARGET_HAS_movcond_i32 0 #define TCG_TARGET_HAS_div_i32 1 From 99f4ec6eab2f1b818b4613ca041e73ef00add350 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 Apr 2023 15:47:34 +0100 Subject: [PATCH 15/23] tcg/riscv: Support ANDN, ORN, XNOR from Zbb Acked-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target-con-set.h | 1 + tcg/riscv/tcg-target-con-str.h | 1 + tcg/riscv/tcg-target.c.inc | 41 ++++++++++++++++++++++++++++++++++ tcg/riscv/tcg-target.h | 12 +++++----- 4 files changed, 49 insertions(+), 6 deletions(-) diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index d88888d3ac..1a33ece98f 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -15,6 +15,7 @@ C_O0_I2(rZ, rZ) C_O1_I1(r, r) C_O1_I2(r, r, ri) C_O1_I2(r, r, rI) +C_O1_I2(r, r, rJ) C_O1_I2(r, rZ, rN) C_O1_I2(r, rZ, rZ) C_O2_I4(r, r, rZ, rZ, rM, rM) diff --git a/tcg/riscv/tcg-target-con-str.h b/tcg/riscv/tcg-target-con-str.h index 6f1cfb976c..d5c419dff1 100644 --- a/tcg/riscv/tcg-target-con-str.h +++ b/tcg/riscv/tcg-target-con-str.h @@ -15,6 +15,7 @@ REGS('r', ALL_GENERAL_REGS) * CONST(letter, TCG_CT_CONST_* bit set) */ CONST('I', TCG_CT_CONST_S12) +CONST('J', TCG_CT_CONST_J12) CONST('N', TCG_CT_CONST_N12) CONST('M', TCG_CT_CONST_M12) CONST('Z', TCG_CT_CONST_ZERO) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index eb3e2e9eb0..edfe4c8f8d 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -138,6 +138,7 @@ static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot) #define TCG_CT_CONST_S12 0x200 #define TCG_CT_CONST_N12 0x400 #define TCG_CT_CONST_M12 0x800 +#define TCG_CT_CONST_J12 0x1000 #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) @@ -174,6 +175,13 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) { return 1; } + /* + * Inverse of sign extended from 12 bits: ~[-0x800, 0x7ff]. + * Used to map ANDN back to ANDI, etc. + */ + if ((ct & TCG_CT_CONST_J12) && ~val >= -0x800 && ~val <= 0x7ff) { + return 1; + } return 0; } @@ -1305,6 +1313,31 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + case INDEX_op_andc_i32: + case INDEX_op_andc_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_ANDI, a0, a1, ~a2); + } else { + tcg_out_opc_reg(s, OPC_ANDN, a0, a1, a2); + } + break; + case INDEX_op_orc_i32: + case INDEX_op_orc_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_ORI, a0, a1, ~a2); + } else { + tcg_out_opc_reg(s, OPC_ORN, a0, a1, a2); + } + break; + case INDEX_op_eqv_i32: + case INDEX_op_eqv_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_XORI, a0, a1, ~a2); + } else { + tcg_out_opc_reg(s, OPC_XNOR, a0, a1, a2); + } + break; + case INDEX_op_not_i32: case INDEX_op_not_i64: tcg_out_opc_imm(s, OPC_XORI, a0, a1, -1); @@ -1539,6 +1572,14 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_xor_i64: return C_O1_I2(r, r, rI); + case INDEX_op_andc_i32: + case INDEX_op_andc_i64: + case INDEX_op_orc_i32: + case INDEX_op_orc_i64: + case INDEX_op_eqv_i32: + case INDEX_op_eqv_i64: + return C_O1_I2(r, r, rJ); + case INDEX_op_sub_i32: case INDEX_op_sub_i64: return C_O1_I2(r, rZ, rN); diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 863ac8ba2f..9f58d46208 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -120,9 +120,9 @@ extern bool have_zbb; #define TCG_TARGET_HAS_bswap32_i32 0 #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_neg_i32 1 -#define TCG_TARGET_HAS_andc_i32 0 -#define TCG_TARGET_HAS_orc_i32 0 -#define TCG_TARGET_HAS_eqv_i32 0 +#define TCG_TARGET_HAS_andc_i32 have_zbb +#define TCG_TARGET_HAS_orc_i32 have_zbb +#define TCG_TARGET_HAS_eqv_i32 have_zbb #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 0 @@ -154,9 +154,9 @@ extern bool have_zbb; #define TCG_TARGET_HAS_bswap64_i64 0 #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_neg_i64 1 -#define TCG_TARGET_HAS_andc_i64 0 -#define TCG_TARGET_HAS_orc_i64 0 -#define TCG_TARGET_HAS_eqv_i64 0 +#define TCG_TARGET_HAS_andc_i64 have_zbb +#define TCG_TARGET_HAS_orc_i64 have_zbb +#define TCG_TARGET_HAS_eqv_i64 have_zbb #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 From d1c3f4e9ed160505c10b3d2acccf43e70e799ed3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 Apr 2023 16:04:16 +0100 Subject: [PATCH 16/23] tcg/riscv: Support ADD.UW, SEXT.B, SEXT.H, ZEXT.H from Zba+Zbb Acked-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index edfe4c8f8d..297119817b 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -593,26 +593,42 @@ static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg) static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg) { - tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16); - tcg_out_opc_imm(s, OPC_SRLIW, ret, ret, 16); + if (have_zbb) { + tcg_out_opc_reg(s, OPC_ZEXT_H, ret, arg, TCG_REG_ZERO); + } else { + tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16); + tcg_out_opc_imm(s, OPC_SRLIW, ret, ret, 16); + } } static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg) { - tcg_out_opc_imm(s, OPC_SLLI, ret, arg, 32); - tcg_out_opc_imm(s, OPC_SRLI, ret, ret, 32); + if (have_zba) { + tcg_out_opc_reg(s, OPC_ADD_UW, ret, arg, TCG_REG_ZERO); + } else { + tcg_out_opc_imm(s, OPC_SLLI, ret, arg, 32); + tcg_out_opc_imm(s, OPC_SRLI, ret, ret, 32); + } } static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { - tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 24); - tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 24); + if (have_zbb) { + tcg_out_opc_imm(s, OPC_SEXT_B, ret, arg, 0); + } else { + tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 24); + tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 24); + } } static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg) { - tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16); - tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 16); + if (have_zbb) { + tcg_out_opc_imm(s, OPC_SEXT_H, ret, arg, 0); + } else { + tcg_out_opc_imm(s, OPC_SLLIW, ret, arg, 16); + tcg_out_opc_imm(s, OPC_SRAIW, ret, ret, 16); + } } static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg) From eda151599654e3981967052defd49945e7879596 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 Apr 2023 16:19:44 +0100 Subject: [PATCH 17/23] tcg/riscv: Use ADD.UW for guest address generation The instruction is a combined zero-extend and add. Use it for exactly that. Acked-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 297119817b..2fdd450da3 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1038,14 +1038,18 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase, tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP0, TCG_REG_TMP1, 0); /* TLB Hit - translate address using addend. */ - addr_adj = addr_reg; - if (TARGET_LONG_BITS == 32) { - addr_adj = TCG_REG_TMP0; - tcg_out_ext32u(s, addr_adj, addr_reg); + if (TARGET_LONG_BITS == 64) { + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2); + } else if (have_zba) { + tcg_out_opc_reg(s, OPC_ADD_UW, TCG_REG_TMP0, addr_reg, TCG_REG_TMP2); + } else { + tcg_out_ext32u(s, TCG_REG_TMP0, addr_reg); + tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP0, TCG_REG_TMP2); } - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_REG_TMP2, addr_adj); *pbase = TCG_REG_TMP0; #else + TCGReg base; + if (a_mask) { ldst = new_ldst_label(s); ldst->is_ld = is_ld; @@ -1060,14 +1064,21 @@ static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, TCGReg *pbase, tcg_out_opc_branch(s, OPC_BNE, TCG_REG_TMP1, TCG_REG_ZERO, 0); } - TCGReg base = addr_reg; - if (TARGET_LONG_BITS == 32) { - tcg_out_ext32u(s, TCG_REG_TMP0, base); - base = TCG_REG_TMP0; - } if (guest_base != 0) { - tcg_out_opc_reg(s, OPC_ADD, TCG_REG_TMP0, TCG_GUEST_BASE_REG, base); base = TCG_REG_TMP0; + if (TARGET_LONG_BITS == 64) { + tcg_out_opc_reg(s, OPC_ADD, base, addr_reg, TCG_GUEST_BASE_REG); + } else if (have_zba) { + tcg_out_opc_reg(s, OPC_ADD_UW, base, addr_reg, TCG_GUEST_BASE_REG); + } else { + tcg_out_ext32u(s, base, addr_reg); + tcg_out_opc_reg(s, OPC_ADD, base, base, TCG_GUEST_BASE_REG); + } + } else if (TARGET_LONG_BITS == 64) { + base = addr_reg; + } else { + base = TCG_REG_TMP0; + tcg_out_ext32u(s, base, addr_reg); } *pbase = base; #endif From 19d016ad9730dd9f04689a65c442b806d941f3a5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 Apr 2023 16:34:07 +0100 Subject: [PATCH 18/23] tcg/riscv: Support rotates from Zbb Acked-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 34 ++++++++++++++++++++++++++++++++++ tcg/riscv/tcg-target.h | 4 ++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 2fdd450da3..cc96425413 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1457,6 +1457,36 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + case INDEX_op_rotl_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_RORIW, a0, a1, -a2 & 0x1f); + } else { + tcg_out_opc_reg(s, OPC_ROLW, a0, a1, a2); + } + break; + case INDEX_op_rotl_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_RORI, a0, a1, -a2 & 0x3f); + } else { + tcg_out_opc_reg(s, OPC_ROL, a0, a1, a2); + } + break; + + case INDEX_op_rotr_i32: + if (c2) { + tcg_out_opc_imm(s, OPC_RORIW, a0, a1, a2 & 0x1f); + } else { + tcg_out_opc_reg(s, OPC_RORW, a0, a1, a2); + } + break; + case INDEX_op_rotr_i64: + if (c2) { + tcg_out_opc_imm(s, OPC_RORI, a0, a1, a2 & 0x3f); + } else { + tcg_out_opc_reg(s, OPC_ROR, a0, a1, a2); + } + break; + case INDEX_op_add2_i32: tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], const_args[4], const_args[5], false, true); @@ -1632,9 +1662,13 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_shl_i32: case INDEX_op_shr_i32: case INDEX_op_sar_i32: + case INDEX_op_rotl_i32: + case INDEX_op_rotr_i32: case INDEX_op_shl_i64: case INDEX_op_shr_i64: case INDEX_op_sar_i64: + case INDEX_op_rotl_i64: + case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); case INDEX_op_brcond_i32: diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 9f58d46208..317d385924 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -101,7 +101,7 @@ extern bool have_zbb; #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_div2_i32 0 -#define TCG_TARGET_HAS_rot_i32 0 +#define TCG_TARGET_HAS_rot_i32 have_zbb #define TCG_TARGET_HAS_deposit_i32 0 #define TCG_TARGET_HAS_extract_i32 0 #define TCG_TARGET_HAS_sextract_i32 0 @@ -136,7 +136,7 @@ extern bool have_zbb; #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_div2_i64 0 -#define TCG_TARGET_HAS_rot_i64 0 +#define TCG_TARGET_HAS_rot_i64 have_zbb #define TCG_TARGET_HAS_deposit_i64 0 #define TCG_TARGET_HAS_extract_i64 0 #define TCG_TARGET_HAS_sextract_i64 0 From 7b4d5274279dbca4b9a83a0a64bc7fb10d7e3970 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 25 Apr 2023 17:04:53 +0100 Subject: [PATCH 19/23] tcg/riscv: Support REV8 from Zbb Acked-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 29 +++++++++++++++++++++++++++++ tcg/riscv/tcg-target.h | 10 +++++----- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index cc96425413..cb4afb4733 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1487,6 +1487,30 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + case INDEX_op_bswap64_i64: + tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); + break; + case INDEX_op_bswap32_i32: + a2 = 0; + /* fall through */ + case INDEX_op_bswap32_i64: + tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); + if (a2 & TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 32); + } else { + tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 32); + } + break; + case INDEX_op_bswap16_i64: + case INDEX_op_bswap16_i32: + tcg_out_opc_imm(s, OPC_REV8, a0, a1, 0); + if (a2 & TCG_BSWAP_OZ) { + tcg_out_opc_imm(s, OPC_SRLI, a0, a0, 48); + } else { + tcg_out_opc_imm(s, OPC_SRAI, a0, a0, 48); + } + break; + case INDEX_op_add2_i32: tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], const_args[4], const_args[5], false, true); @@ -1608,6 +1632,11 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_extrl_i64_i32: case INDEX_op_extrh_i64_i32: case INDEX_op_ext_i32_i64: + case INDEX_op_bswap16_i32: + case INDEX_op_bswap32_i32: + case INDEX_op_bswap16_i64: + case INDEX_op_bswap32_i64: + case INDEX_op_bswap64_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 317d385924..8e327afc3a 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -116,8 +116,8 @@ extern bool have_zbb; #define TCG_TARGET_HAS_ext16s_i32 1 #define TCG_TARGET_HAS_ext8u_i32 1 #define TCG_TARGET_HAS_ext16u_i32 1 -#define TCG_TARGET_HAS_bswap16_i32 0 -#define TCG_TARGET_HAS_bswap32_i32 0 +#define TCG_TARGET_HAS_bswap16_i32 have_zbb +#define TCG_TARGET_HAS_bswap32_i32 have_zbb #define TCG_TARGET_HAS_not_i32 1 #define TCG_TARGET_HAS_neg_i32 1 #define TCG_TARGET_HAS_andc_i32 have_zbb @@ -149,9 +149,9 @@ extern bool have_zbb; #define TCG_TARGET_HAS_ext8u_i64 1 #define TCG_TARGET_HAS_ext16u_i64 1 #define TCG_TARGET_HAS_ext32u_i64 1 -#define TCG_TARGET_HAS_bswap16_i64 0 -#define TCG_TARGET_HAS_bswap32_i64 0 -#define TCG_TARGET_HAS_bswap64_i64 0 +#define TCG_TARGET_HAS_bswap16_i64 have_zbb +#define TCG_TARGET_HAS_bswap32_i64 have_zbb +#define TCG_TARGET_HAS_bswap64_i64 have_zbb #define TCG_TARGET_HAS_not_i64 1 #define TCG_TARGET_HAS_neg_i64 1 #define TCG_TARGET_HAS_andc_i64 have_zbb From 0956ecda9fad9e81b8cbb1e5a05ae60bf6971f2d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Apr 2023 09:16:11 +0100 Subject: [PATCH 20/23] tcg/riscv: Support CPOP from Zbb Acked-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 9 +++++++++ tcg/riscv/tcg-target.h | 4 ++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index cb4afb4733..05ea9fead8 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1511,6 +1511,13 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, } break; + case INDEX_op_ctpop_i32: + tcg_out_opc_imm(s, OPC_CPOPW, a0, a1, 0); + break; + case INDEX_op_ctpop_i64: + tcg_out_opc_imm(s, OPC_CPOP, a0, a1, 0); + break; + case INDEX_op_add2_i32: tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], const_args[4], const_args[5], false, true); @@ -1637,6 +1644,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_bswap16_i64: case INDEX_op_bswap32_i64: case INDEX_op_bswap64_i64: + case INDEX_op_ctpop_i32: + case INDEX_op_ctpop_i64: return C_O1_I1(r, r); case INDEX_op_st8_i32: diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 8e327afc3a..e0b23006c4 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -127,7 +127,7 @@ extern bool have_zbb; #define TCG_TARGET_HAS_nor_i32 0 #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 -#define TCG_TARGET_HAS_ctpop_i32 0 +#define TCG_TARGET_HAS_ctpop_i32 have_zbb #define TCG_TARGET_HAS_brcond2 1 #define TCG_TARGET_HAS_setcond2 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 @@ -161,7 +161,7 @@ extern bool have_zbb; #define TCG_TARGET_HAS_nor_i64 0 #define TCG_TARGET_HAS_clz_i64 0 #define TCG_TARGET_HAS_ctz_i64 0 -#define TCG_TARGET_HAS_ctpop_i64 0 +#define TCG_TARGET_HAS_ctpop_i64 have_zbb #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1 #define TCG_TARGET_HAS_mulu2_i64 0 From f6453695f9f87ba1974eca13322864810c90b9f0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Apr 2023 09:38:32 +0100 Subject: [PATCH 21/23] tcg/riscv: Improve setcond expansion Split out a helper function, tcg_out_setcond_int, which does not always produce the complete boolean result, but returns a set of flags to do so. Based on 21af16198425, the same improvement for loongarch64. Acked-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 164 +++++++++++++++++++++++++++---------- 1 file changed, 121 insertions(+), 43 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 05ea9fead8..db328ddc2d 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -812,50 +812,128 @@ static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, tcg_out_opc_branch(s, op, arg1, arg2, 0); } -static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, - TCGReg arg1, TCGReg arg2) +#define SETCOND_INV TCG_TARGET_NB_REGS +#define SETCOND_NEZ (SETCOND_INV << 1) +#define SETCOND_FLAGS (SETCOND_INV | SETCOND_NEZ) + +static int tcg_out_setcond_int(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, tcg_target_long arg2, bool c2) { + int flags = 0; + switch (cond) { - case TCG_COND_EQ: - tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1); - break; - case TCG_COND_NE: - tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); - tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret); - break; - case TCG_COND_LT: - tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); - break; - case TCG_COND_GE: - tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_LE: - tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_GT: - tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); - break; - case TCG_COND_LTU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); - break; - case TCG_COND_GEU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_LEU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); - tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); - break; - case TCG_COND_GTU: - tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); + case TCG_COND_EQ: /* -> NE */ + case TCG_COND_GE: /* -> LT */ + case TCG_COND_GEU: /* -> LTU */ + case TCG_COND_GT: /* -> LE */ + case TCG_COND_GTU: /* -> LEU */ + cond = tcg_invert_cond(cond); + flags ^= SETCOND_INV; break; default: - g_assert_not_reached(); - break; - } + break; + } + + switch (cond) { + case TCG_COND_LE: + case TCG_COND_LEU: + /* + * If we have a constant input, the most efficient way to implement + * LE is by adding 1 and using LT. Watch out for wrap around for LEU. + * We don't need to care for this for LE because the constant input + * is constrained to signed 12-bit, and 0x800 is representable in the + * temporary register. + */ + if (c2) { + if (cond == TCG_COND_LEU) { + /* unsigned <= -1 is true */ + if (arg2 == -1) { + tcg_out_movi(s, TCG_TYPE_REG, ret, !(flags & SETCOND_INV)); + return ret; + } + cond = TCG_COND_LTU; + } else { + cond = TCG_COND_LT; + } + tcg_debug_assert(arg2 <= 0x7ff); + if (++arg2 == 0x800) { + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP0, arg2); + arg2 = TCG_REG_TMP0; + c2 = false; + } + } else { + TCGReg tmp = arg2; + arg2 = arg1; + arg1 = tmp; + cond = tcg_swap_cond(cond); /* LE -> GE */ + cond = tcg_invert_cond(cond); /* GE -> LT */ + flags ^= SETCOND_INV; + } + break; + default: + break; + } + + switch (cond) { + case TCG_COND_NE: + flags |= SETCOND_NEZ; + if (!c2) { + tcg_out_opc_reg(s, OPC_XOR, ret, arg1, arg2); + } else if (arg2 == 0) { + ret = arg1; + } else { + tcg_out_opc_imm(s, OPC_XORI, ret, arg1, arg2); + } + break; + + case TCG_COND_LT: + if (c2) { + tcg_out_opc_imm(s, OPC_SLTI, ret, arg1, arg2); + } else { + tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); + } + break; + + case TCG_COND_LTU: + if (c2) { + tcg_out_opc_imm(s, OPC_SLTIU, ret, arg1, arg2); + } else { + tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); + } + break; + + default: + g_assert_not_reached(); + } + + return ret | flags; +} + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, tcg_target_long arg2, bool c2) +{ + int tmpflags = tcg_out_setcond_int(s, cond, ret, arg1, arg2, c2); + + if (tmpflags != ret) { + TCGReg tmp = tmpflags & ~SETCOND_FLAGS; + + switch (tmpflags & SETCOND_FLAGS) { + case SETCOND_INV: + /* Intermediate result is boolean: simply invert. */ + tcg_out_opc_imm(s, OPC_XORI, ret, tmp, 1); + break; + case SETCOND_NEZ: + /* Intermediate result is zero/non-zero: test != 0. */ + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, tmp); + break; + case SETCOND_NEZ | SETCOND_INV: + /* Intermediate result is zero/non-zero: test == 0. */ + tcg_out_opc_imm(s, OPC_SLTIU, ret, tmp, 1); + break; + default: + g_assert_not_reached(); + } + } } static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) @@ -1542,7 +1620,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_setcond_i32: case INDEX_op_setcond_i64: - tcg_out_setcond(s, args[3], a0, a1, a2); + tcg_out_setcond(s, args[3], a0, a1, a2, c2); break; case INDEX_op_qemu_ld_a32_i32: @@ -1665,6 +1743,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_and_i64: case INDEX_op_or_i64: case INDEX_op_xor_i64: + case INDEX_op_setcond_i32: + case INDEX_op_setcond_i64: return C_O1_I2(r, r, rI); case INDEX_op_andc_i32: @@ -1686,7 +1766,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_divu_i32: case INDEX_op_rem_i32: case INDEX_op_remu_i32: - case INDEX_op_setcond_i32: case INDEX_op_mul_i64: case INDEX_op_mulsh_i64: case INDEX_op_muluh_i64: @@ -1694,7 +1773,6 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_divu_i64: case INDEX_op_rem_i64: case INDEX_op_remu_i64: - case INDEX_op_setcond_i64: return C_O1_I2(r, rZ, rZ); case INDEX_op_shl_i32: From a18d783e649d761a7556f6d964efa453bb4f3a06 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Apr 2023 11:59:55 +0100 Subject: [PATCH 22/23] tcg/riscv: Implement movcond Implement with and without Zicond. Without Zicond, we were letting the middle-end expand to a 5 insn sequence; better to use a branch over a single insn. Acked-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target-con-set.h | 1 + tcg/riscv/tcg-target.c.inc | 139 ++++++++++++++++++++++++++++++++- tcg/riscv/tcg-target.h | 4 +- 3 files changed, 141 insertions(+), 3 deletions(-) diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index 1a33ece98f..a5cadd303f 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -18,4 +18,5 @@ C_O1_I2(r, r, rI) C_O1_I2(r, r, rJ) C_O1_I2(r, rZ, rN) C_O1_I2(r, rZ, rZ) +C_O1_I4(r, r, rI, rM, rM) C_O2_I4(r, r, rZ, rZ, rM, rM) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index db328ddc2d..811b84d152 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -169,7 +169,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) } /* * Sign extended from 12 bits, +/- matching: [-0x7ff, 0x7ff]. - * Used by addsub2, which may need the negative operation, + * Used by addsub2 and movcond, which may need the negative value, * and requires the modified constant to be representable. */ if ((ct & TCG_CT_CONST_M12) && val >= -0x7ff && val <= 0x7ff) { @@ -936,6 +936,133 @@ static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static void tcg_out_movcond_zicond(TCGContext *s, TCGReg ret, TCGReg test_ne, + int val1, bool c_val1, + int val2, bool c_val2) +{ + if (val1 == 0) { + if (c_val2) { + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val2); + val2 = TCG_REG_TMP1; + } + tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, val2, test_ne); + return; + } + + if (val2 == 0) { + if (c_val1) { + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1); + val1 = TCG_REG_TMP1; + } + tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, val1, test_ne); + return; + } + + if (c_val2) { + if (c_val1) { + tcg_out_movi(s, TCG_TYPE_REG, TCG_REG_TMP1, val1 - val2); + } else { + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val1, -val2); + } + tcg_out_opc_reg(s, OPC_CZERO_EQZ, ret, TCG_REG_TMP1, test_ne); + tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val2); + return; + } + + if (c_val1) { + tcg_out_opc_imm(s, OPC_ADDI, TCG_REG_TMP1, val2, -val1); + tcg_out_opc_reg(s, OPC_CZERO_NEZ, ret, TCG_REG_TMP1, test_ne); + tcg_out_opc_imm(s, OPC_ADDI, ret, ret, val1); + return; + } + + tcg_out_opc_reg(s, OPC_CZERO_NEZ, TCG_REG_TMP1, val2, test_ne); + tcg_out_opc_reg(s, OPC_CZERO_EQZ, TCG_REG_TMP0, val1, test_ne); + tcg_out_opc_reg(s, OPC_OR, ret, TCG_REG_TMP0, TCG_REG_TMP1); +} + +static void tcg_out_movcond_br1(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg cmp1, TCGReg cmp2, + int val, bool c_val) +{ + RISCVInsn op; + int disp = 8; + + tcg_debug_assert((unsigned)cond < ARRAY_SIZE(tcg_brcond_to_riscv)); + op = tcg_brcond_to_riscv[cond].op; + tcg_debug_assert(op != 0); + + if (tcg_brcond_to_riscv[cond].swap) { + tcg_out_opc_branch(s, op, cmp2, cmp1, disp); + } else { + tcg_out_opc_branch(s, op, cmp1, cmp2, disp); + } + if (c_val) { + tcg_out_opc_imm(s, OPC_ADDI, ret, TCG_REG_ZERO, val); + } else { + tcg_out_opc_imm(s, OPC_ADDI, ret, val, 0); + } +} + +static void tcg_out_movcond_br2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg cmp1, TCGReg cmp2, + int val1, bool c_val1, + int val2, bool c_val2) +{ + TCGReg tmp; + + /* TCG optimizer reorders to prefer ret matching val2. */ + if (!c_val2 && ret == val2) { + cond = tcg_invert_cond(cond); + tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val1, c_val1); + return; + } + + if (!c_val1 && ret == val1) { + tcg_out_movcond_br1(s, cond, ret, cmp1, cmp2, val2, c_val2); + return; + } + + tmp = (ret == cmp1 || ret == cmp2 ? TCG_REG_TMP1 : ret); + if (c_val1) { + tcg_out_movi(s, TCG_TYPE_REG, tmp, val1); + } else { + tcg_out_mov(s, TCG_TYPE_REG, tmp, val1); + } + tcg_out_movcond_br1(s, cond, tmp, cmp1, cmp2, val2, c_val2); + tcg_out_mov(s, TCG_TYPE_REG, ret, tmp); +} + +static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg cmp1, int cmp2, bool c_cmp2, + TCGReg val1, bool c_val1, + TCGReg val2, bool c_val2) +{ + int tmpflags; + TCGReg t; + + if (!have_zicond && (!c_cmp2 || cmp2 == 0)) { + tcg_out_movcond_br2(s, cond, ret, cmp1, cmp2, + val1, c_val1, val2, c_val2); + return; + } + + tmpflags = tcg_out_setcond_int(s, cond, TCG_REG_TMP0, cmp1, cmp2, c_cmp2); + t = tmpflags & ~SETCOND_FLAGS; + + if (have_zicond) { + if (tmpflags & SETCOND_INV) { + tcg_out_movcond_zicond(s, ret, t, val2, c_val2, val1, c_val1); + } else { + tcg_out_movcond_zicond(s, ret, t, val1, c_val1, val2, c_val2); + } + } else { + cond = tmpflags & SETCOND_INV ? TCG_COND_EQ : TCG_COND_NE; + tcg_out_movcond_br2(s, cond, ret, t, TCG_REG_ZERO, + val1, c_val1, val2, c_val2); + } +} + static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) { TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; @@ -1623,6 +1750,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_setcond(s, args[3], a0, a1, a2, c2); break; + case INDEX_op_movcond_i32: + case INDEX_op_movcond_i64: + tcg_out_movcond(s, args[5], a0, a1, a2, c2, + args[3], const_args[3], args[4], const_args[4]); + break; + case INDEX_op_qemu_ld_a32_i32: case INDEX_op_qemu_ld_a64_i32: tcg_out_qemu_ld(s, a0, a1, a2, TCG_TYPE_I32); @@ -1791,6 +1924,10 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_brcond_i64: return C_O0_I2(rZ, rZ); + case INDEX_op_movcond_i32: + case INDEX_op_movcond_i64: + return C_O1_I4(r, r, rI, rM, rM); + case INDEX_op_add2_i32: case INDEX_op_add2_i64: case INDEX_op_sub2_i32: diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index e0b23006c4..e9e84be9a5 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -97,7 +97,7 @@ extern bool have_zbb; #endif /* optional instructions */ -#define TCG_TARGET_HAS_movcond_i32 0 +#define TCG_TARGET_HAS_movcond_i32 1 #define TCG_TARGET_HAS_div_i32 1 #define TCG_TARGET_HAS_rem_i32 1 #define TCG_TARGET_HAS_div2_i32 0 @@ -132,7 +132,7 @@ extern bool have_zbb; #define TCG_TARGET_HAS_setcond2 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 -#define TCG_TARGET_HAS_movcond_i64 0 +#define TCG_TARGET_HAS_movcond_i64 1 #define TCG_TARGET_HAS_div_i64 1 #define TCG_TARGET_HAS_rem_i64 1 #define TCG_TARGET_HAS_div2_i64 0 From a30498fcea5a8b9c544324ccfb0186090104b229 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 26 Apr 2023 14:21:55 +0100 Subject: [PATCH 23/23] tcg/riscv: Support CTZ, CLZ from Zbb Acked-by: Alistair Francis Reviewed-by: Daniel Henrique Barboza Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target-con-set.h | 1 + tcg/riscv/tcg-target.c.inc | 35 ++++++++++++++++++++++++++++++++++ tcg/riscv/tcg-target.h | 8 ++++---- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/tcg/riscv/tcg-target-con-set.h b/tcg/riscv/tcg-target-con-set.h index a5cadd303f..aac5ceee2b 100644 --- a/tcg/riscv/tcg-target-con-set.h +++ b/tcg/riscv/tcg-target-con-set.h @@ -18,5 +18,6 @@ C_O1_I2(r, r, rI) C_O1_I2(r, r, rJ) C_O1_I2(r, rZ, rN) C_O1_I2(r, rZ, rZ) +C_N1_I2(r, r, rM) C_O1_I4(r, r, rI, rM, rM) C_O2_I4(r, r, rZ, rZ, rM, rM) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 811b84d152..c0257124fa 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1063,6 +1063,22 @@ static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret, } } +static void tcg_out_cltz(TCGContext *s, TCGType type, RISCVInsn insn, + TCGReg ret, TCGReg src1, int src2, bool c_src2) +{ + tcg_out_opc_imm(s, insn, ret, src1, 0); + + if (!c_src2 || src2 != (type == TCG_TYPE_I32 ? 32 : 64)) { + /* + * The requested zero result does not match the insn, so adjust. + * Note that constraints put 'ret' in a new register, so the + * computation above did not clobber either 'src1' or 'src2'. + */ + tcg_out_movcond(s, TCG_COND_EQ, ret, src1, 0, true, + src2, c_src2, ret, false); + } +} + static void tcg_out_call_int(TCGContext *s, const tcg_insn_unit *arg, bool tail) { TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; @@ -1723,6 +1739,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_opc_imm(s, OPC_CPOP, a0, a1, 0); break; + case INDEX_op_clz_i32: + tcg_out_cltz(s, TCG_TYPE_I32, OPC_CLZW, a0, a1, a2, c2); + break; + case INDEX_op_clz_i64: + tcg_out_cltz(s, TCG_TYPE_I64, OPC_CLZ, a0, a1, a2, c2); + break; + case INDEX_op_ctz_i32: + tcg_out_cltz(s, TCG_TYPE_I32, OPC_CTZW, a0, a1, a2, c2); + break; + case INDEX_op_ctz_i64: + tcg_out_cltz(s, TCG_TYPE_I64, OPC_CTZ, a0, a1, a2, c2); + break; + case INDEX_op_add2_i32: tcg_out_addsub2(s, a0, a1, a2, args[3], args[4], args[5], const_args[4], const_args[5], false, true); @@ -1920,6 +1949,12 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) case INDEX_op_rotr_i64: return C_O1_I2(r, r, ri); + case INDEX_op_clz_i32: + case INDEX_op_clz_i64: + case INDEX_op_ctz_i32: + case INDEX_op_ctz_i64: + return C_N1_I2(r, r, rM); + case INDEX_op_brcond_i32: case INDEX_op_brcond_i64: return C_O0_I2(rZ, rZ); diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index e9e84be9a5..62fe61af7b 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -125,8 +125,8 @@ extern bool have_zbb; #define TCG_TARGET_HAS_eqv_i32 have_zbb #define TCG_TARGET_HAS_nand_i32 0 #define TCG_TARGET_HAS_nor_i32 0 -#define TCG_TARGET_HAS_clz_i32 0 -#define TCG_TARGET_HAS_ctz_i32 0 +#define TCG_TARGET_HAS_clz_i32 have_zbb +#define TCG_TARGET_HAS_ctz_i32 have_zbb #define TCG_TARGET_HAS_ctpop_i32 have_zbb #define TCG_TARGET_HAS_brcond2 1 #define TCG_TARGET_HAS_setcond2 1 @@ -159,8 +159,8 @@ extern bool have_zbb; #define TCG_TARGET_HAS_eqv_i64 have_zbb #define TCG_TARGET_HAS_nand_i64 0 #define TCG_TARGET_HAS_nor_i64 0 -#define TCG_TARGET_HAS_clz_i64 0 -#define TCG_TARGET_HAS_ctz_i64 0 +#define TCG_TARGET_HAS_clz_i64 have_zbb +#define TCG_TARGET_HAS_ctz_i64 have_zbb #define TCG_TARGET_HAS_ctpop_i64 have_zbb #define TCG_TARGET_HAS_add2_i64 1 #define TCG_TARGET_HAS_sub2_i64 1