From b55a8d9d0bb486c0ad7a34985ec43f22fae930c3 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 26 Nov 2022 12:42:06 -0800 Subject: [PATCH 01/22] tcg: Split out tcg_out_exit_tb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The INDEX_op_exit_tb opcode needs no register allocation. Split out a dedicated helper function for it. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 22 ++++++++++-------- tcg/arm/tcg-target.c.inc | 11 +++++---- tcg/i386/tcg-target.c.inc | 21 +++++++++-------- tcg/loongarch64/tcg-target.c.inc | 22 ++++++++++-------- tcg/mips/tcg-target.c.inc | 33 +++++++++++++-------------- tcg/ppc/tcg-target.c.inc | 11 +++++---- tcg/riscv/tcg-target.c.inc | 22 ++++++++++-------- tcg/s390x/tcg-target.c.inc | 23 ++++++++++--------- tcg/sparc64/tcg-target.c.inc | 39 +++++++++++++++++--------------- tcg/tcg.c | 4 ++++ tcg/tci/tcg-target.c.inc | 10 ++++---- 11 files changed, 121 insertions(+), 97 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index ad1816e32d..501b77c215 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1887,6 +1887,17 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, static const tcg_insn_unit *tb_ret_addr; +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_goto_long(s, tcg_code_gen_epilogue); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); + tcg_out_goto_long(s, tb_ret_addr); + } +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1906,16 +1917,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) switch (opc) { - case INDEX_op_exit_tb: - /* Reuse the zeroing that exists for goto_ptr. */ - if (a0 == 0) { - tcg_out_goto_long(s, tcg_code_gen_epilogue); - } else { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0); - tcg_out_goto_long(s, tb_ret_addr); - } - break; - case INDEX_op_goto_tb: tcg_debug_assert(s->tb_jmp_insn_offset != NULL); /* @@ -2305,6 +2306,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ default: g_assert_not_reached(); } diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 9245ea86d0..799cf13536 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1933,6 +1933,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) static void tcg_out_epilogue(TCGContext *s); +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) +{ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, arg); + tcg_out_epilogue(s); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1941,10 +1947,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, int c; switch (opc) { - case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]); - tcg_out_epilogue(s); - break; case INDEX_op_goto_tb: { /* Indirect jump method */ @@ -2256,6 +2258,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ default: tcg_abort(); } diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 58bd5873f5..feb257db01 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2347,6 +2347,17 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #endif } +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_jmp(s, tcg_code_gen_epilogue); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0); + tcg_out_jmp(s, tb_ret_addr); + } +} + static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2371,15 +2382,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const_a2 = const_args[2]; switch (opc) { - case INDEX_op_exit_tb: - /* Reuse the zeroing that exists for goto_ptr. */ - if (a0 == 0) { - tcg_out_jmp(s, tcg_code_gen_epilogue); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, a0); - tcg_out_jmp(s, tb_ret_addr); - } - break; case INDEX_op_goto_tb: if (s->tb_jmp_insn_offset) { /* direct jump method */ @@ -2794,6 +2796,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ default: tcg_abort(); } diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index c9e99e8ec3..29e4bfcb49 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1068,6 +1068,17 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, static const tcg_insn_unit *tb_ret_addr; +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_call_int(s, tcg_code_gen_epilogue, true); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); + tcg_out_call_int(s, tb_ret_addr, true); + } +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1078,16 +1089,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, int c2 = const_args[2]; switch (opc) { - case INDEX_op_exit_tb: - /* Reuse the zeroing that exists for goto_ptr. */ - if (a0 == 0) { - tcg_out_call_int(s, tcg_code_gen_epilogue, true); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); - tcg_out_call_int(s, tb_ret_addr, true); - } - break; - case INDEX_op_goto_tb: tcg_debug_assert(s->tb_jmp_insn_offset != NULL); /* @@ -1500,6 +1501,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ default: g_assert_not_reached(); } diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 292e490b5c..52881abd35 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1951,6 +1951,21 @@ static void tcg_out_clz(TCGContext *s, MIPSInsn opcv2, MIPSInsn opcv6, } } +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + TCGReg b0 = TCG_REG_ZERO; + + if (a0 & ~0xffff) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); + b0 = TCG_REG_V0; + } + if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, (uintptr_t)tb_ret_addr); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); + } + tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1970,23 +1985,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, c2 = const_args[2]; switch (opc) { - case INDEX_op_exit_tb: - { - TCGReg b0 = TCG_REG_ZERO; - - a0 = (intptr_t)a0; - if (a0 & ~0xffff) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_V0, a0 & ~0xffff); - b0 = TCG_REG_V0; - } - if (!tcg_out_opc_jmp(s, OPC_J, tb_ret_addr)) { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, - (uintptr_t)tb_ret_addr); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); - } - tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); - } - break; case INDEX_op_goto_tb: /* indirect jump method */ tcg_debug_assert(s->tb_jmp_insn_offset == 0); @@ -2403,6 +2401,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ default: tcg_abort(); } diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index e0621463f6..a95e4001d3 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2616,6 +2616,12 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out32(s, BCLR | BO_ALWAYS); } +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) +{ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, arg); + tcg_out_b(s, 0, tcg_code_gen_epilogue); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2623,10 +2629,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGArg a0, a1, a2; switch (opc) { - case INDEX_op_exit_tb: - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, args[0]); - tcg_out_b(s, 0, tcg_code_gen_epilogue); - break; case INDEX_op_goto_tb: if (s->tb_jmp_insn_offset) { /* Direct jump. */ @@ -3185,6 +3187,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ default: tcg_abort(); } diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index f741e0582d..9b42cb4b2e 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1289,6 +1289,17 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) static const tcg_insn_unit *tb_ret_addr; +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tcg_out_call_int(s, tcg_code_gen_epilogue, true); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); + tcg_out_call_int(s, tb_ret_addr, true); + } +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1299,16 +1310,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, int c2 = const_args[2]; switch (opc) { - case INDEX_op_exit_tb: - /* Reuse the zeroing that exists for goto_ptr. */ - if (a0 == 0) { - tcg_out_call_int(s, tcg_code_gen_epilogue, true); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_A0, a0); - tcg_out_call_int(s, tb_ret_addr, true); - } - break; - case INDEX_op_goto_tb: assert(s->tb_jmp_insn_offset == 0); /* indirect jump method */ @@ -1617,6 +1618,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ default: g_assert_not_reached(); } diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 2b38fd991d..48a0c3e3c0 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1944,6 +1944,17 @@ static void tcg_out_qemu_st(TCGContext* s, TCGReg data_reg, TCGReg addr_reg, #endif } +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + /* Reuse the zeroing that exists for goto_ptr. */ + if (a0 == 0) { + tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0); + tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); + } +} + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ case glue(glue(INDEX_op_,x),_i64) @@ -1956,17 +1967,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGArg a0, a1, a2; switch (opc) { - case INDEX_op_exit_tb: - /* Reuse the zeroing that exists for goto_ptr. */ - a0 = args[0]; - if (a0 == 0) { - tgen_gotoi(s, S390_CC_ALWAYS, tcg_code_gen_epilogue); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R2, a0); - tgen_gotoi(s, S390_CC_ALWAYS, tb_ret_addr); - } - break; - case INDEX_op_goto_tb: a0 = args[0]; /* @@ -2619,6 +2619,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ default: tcg_abort(); } diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index eb913f33c8..d2d8b46815 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1428,6 +1428,26 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, #endif /* CONFIG_SOFTMMU */ } +static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) +{ + if (check_fit_ptr(a0, 13)) { + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + tcg_out_movi_imm13(s, TCG_REG_O0, a0); + return; + } else if (USE_REG_TB) { + intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0); + if (check_fit_ptr(tb_diff, 13)) { + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + /* Note that TCG_REG_TB has been unwound to O1. */ + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O1, tb_diff, ARITH_ADD); + return; + } + } + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff); + tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); + tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1442,24 +1462,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, c2 = const_args[2]; switch (opc) { - case INDEX_op_exit_tb: - if (check_fit_ptr(a0, 13)) { - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); - tcg_out_movi_imm13(s, TCG_REG_O0, a0); - break; - } else if (USE_REG_TB) { - intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0); - if (check_fit_ptr(tb_diff, 13)) { - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); - /* Note that TCG_REG_TB has been unwound to O1. */ - tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O1, tb_diff, ARITH_ADD); - break; - } - } - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I0, a0 & ~0x3ff); - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); - tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR); - break; case INDEX_op_goto_tb: if (s->tb_jmp_insn_offset) { /* direct jump method */ @@ -1716,6 +1718,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ default: tcg_abort(); } diff --git a/tcg/tcg.c b/tcg/tcg.c index 9b7df71e7a..257479337c 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -104,6 +104,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1, static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg); +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]); @@ -4718,6 +4719,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) case INDEX_op_call: tcg_reg_alloc_call(s, op); break; + case INDEX_op_exit_tb: + tcg_out_exit_tb(s, op->args[0]); + break; case INDEX_op_dup2_vec: if (tcg_reg_alloc_dup2(s, op)) { break; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index d36a7ebdd1..2f3bcce3a7 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -590,6 +590,11 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *func, # define CASE_64(x) #endif +static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) +{ + tcg_out_op_p(s, INDEX_op_exit_tb, (void *)arg); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -597,10 +602,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGOpcode exts; switch (opc) { - case INDEX_op_exit_tb: - tcg_out_op_p(s, opc, (void *)args[0]); - break; - case INDEX_op_goto_tb: tcg_debug_assert(s->tb_jmp_insn_offset == 0); /* indirect jump method. */ @@ -779,6 +780,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ + case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ default: tcg_abort(); } From 3bb8500ef83613cf3d113041b4ba3104136d9aaf Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 26 Nov 2022 15:04:26 -0800 Subject: [PATCH 02/22] tcg/i386: Remove unused goto_tb code for indirect jump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/i386/tcg-target.c.inc | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index feb257db01..c4ff59e9ee 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2383,23 +2383,19 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: - if (s->tb_jmp_insn_offset) { - /* direct jump method */ - int gap; - /* jump displacement must be aligned for atomic patching; + qemu_build_assert(TCG_TARGET_HAS_direct_jump); + { + /* + * Jump displacement must be aligned for atomic patching; * see if we need to add extra nops before jump */ - gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr; + int gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr; if (gap != 1) { tcg_out_nopn(s, gap - 1); } tcg_out8(s, OPC_JMP_long); /* jmp im */ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); tcg_out32(s, 0); - } else { - /* indirect jump method */ - tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1, - (intptr_t)(s->tb_jmp_target_addr + a0)); } set_jmp_reset_offset(s, a0); break; From cea583d13cb5afdd0d9ac12cb91841f8f33008f7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 26 Nov 2022 15:05:01 -0800 Subject: [PATCH 03/22] tcg/ppc: Remove unused goto_tb code for indirect jump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c.inc | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index a95e4001d3..b72e266990 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2630,27 +2630,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: - if (s->tb_jmp_insn_offset) { - /* Direct jump. */ - if (TCG_TARGET_REG_BITS == 64) { - /* Ensure the next insns are 8 or 16-byte aligned. */ - while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { - tcg_out32(s, NOP); - } - s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); - tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); - tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); - } else { - s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); - tcg_out32(s, B); - s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); - break; + qemu_build_assert(TCG_TARGET_HAS_direct_jump); + /* Direct jump. */ + if (TCG_TARGET_REG_BITS == 64) { + /* Ensure the next insns are 8 or 16-byte aligned. */ + while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { + tcg_out32(s, NOP); } + s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); + tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); + tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); } else { - /* Indirect jump. */ - tcg_debug_assert(s->tb_jmp_insn_offset == NULL); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, 0, - (intptr_t)(s->tb_jmp_insn_offset + args[0])); + s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); + tcg_out32(s, B); + s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); + break; } tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); tcg_out32(s, BCCTR | BO_ALWAYS); From 1ce41e044391120e8dd74fc7c9119747fb072632 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 26 Nov 2022 15:05:37 -0800 Subject: [PATCH 04/22] tcg/sparc64: Remove unused goto_tb code for indirect jump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target.c.inc | 41 +++++++++++------------------------- 1 file changed, 12 insertions(+), 29 deletions(-) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index d2d8b46815..26b00d1638 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -537,17 +537,6 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } -static void tcg_out_ld_ptr(TCGContext *s, TCGReg ret, const void *arg) -{ - intptr_t diff = tcg_tbrel_diff(s, arg); - if (USE_REG_TB && check_fit_ptr(diff, 13)) { - tcg_out_ld(s, TCG_TYPE_PTR, ret, TCG_REG_TB, diff); - return; - } - tcg_out_movi(s, TCG_TYPE_PTR, ret, (uintptr_t)arg & ~0x3ff); - tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, (uintptr_t)arg & 0x3ff); -} - static void tcg_out_sety(TCGContext *s, TCGReg rs) { tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); @@ -1463,27 +1452,21 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: - if (s->tb_jmp_insn_offset) { - /* direct jump method */ - if (USE_REG_TB) { - /* make sure the patch is 8-byte aligned. */ - if ((intptr_t)s->code_ptr & 4) { - tcg_out_nop(s); - } - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); - tcg_out_sethi(s, TCG_REG_T1, 0); - tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); - tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); - tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); - } else { - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); - tcg_out32(s, CALL); + qemu_build_assert(TCG_TARGET_HAS_direct_jump); + /* Direct jump. */ + if (USE_REG_TB) { + /* make sure the patch is 8-byte aligned. */ + if ((intptr_t)s->code_ptr & 4) { tcg_out_nop(s); } + s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); + tcg_out_sethi(s, TCG_REG_T1, 0); + tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); + tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); + tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); } else { - /* indirect jump method */ - tcg_out_ld_ptr(s, TCG_REG_TB, s->tb_jmp_target_addr + a0); - tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL); + s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); + tcg_out32(s, CALL); tcg_out_nop(s); } set_jmp_reset_offset(s, a0); From 7f83167c612438bb46ef01b5b23f7b2a0827bdc4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 26 Nov 2022 15:09:00 -0800 Subject: [PATCH 05/22] tcg: Replace asserts on tcg_jmp_insn_offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Test TCG_TARGET_HAS_direct_jump instead of testing an implementation pointer. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 2 +- tcg/arm/tcg-target.c.inc | 2 +- tcg/loongarch64/tcg-target.c.inc | 2 +- tcg/mips/tcg-target.c.inc | 2 +- tcg/riscv/tcg-target.c.inc | 2 +- tcg/tci/tcg-target.c.inc | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 501b77c215..90af096c11 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1918,7 +1918,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: - tcg_debug_assert(s->tb_jmp_insn_offset != NULL); + qemu_build_assert(TCG_TARGET_HAS_direct_jump); /* * Ensure that ADRP+ADD are 8-byte aligned so that an atomic * write can be used to patch the target address. diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 799cf13536..033ff90daa 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1953,7 +1953,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, intptr_t ptr, dif, dil; TCGReg base = TCG_REG_PC; - tcg_debug_assert(s->tb_jmp_insn_offset == 0); + qemu_build_assert(!TCG_TARGET_HAS_direct_jump); ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]); dif = tcg_pcrel_diff(s, (void *)ptr) - 8; dil = sextract32(dif, 0, 12); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 29e4bfcb49..5dd645fd17 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1090,7 +1090,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: - tcg_debug_assert(s->tb_jmp_insn_offset != NULL); + qemu_build_assert(TCG_TARGET_HAS_direct_jump); /* * Ensure that patch area is 8-byte aligned so that an * atomic write can be used to patch the target address. diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 52881abd35..02887d7cb1 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1987,7 +1987,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: /* indirect jump method */ - tcg_debug_assert(s->tb_jmp_insn_offset == 0); + qemu_build_assert(!TCG_TARGET_HAS_direct_jump); tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, (uintptr_t)(s->tb_jmp_target_addr + a0)); tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 9b42cb4b2e..b977c8025d 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1311,7 +1311,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: - assert(s->tb_jmp_insn_offset == 0); + qemu_build_assert(!TCG_TARGET_HAS_direct_jump); /* indirect jump method */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, (uintptr_t)(s->tb_jmp_target_addr + a0)); diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 2f3bcce3a7..ad356f1875 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -603,7 +603,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: - tcg_debug_assert(s->tb_jmp_insn_offset == 0); + qemu_build_assert(!TCG_TARGET_HAS_direct_jump); /* indirect jump method. */ tcg_out_op_p(s, opc, s->tb_jmp_target_addr + args[0]); set_jmp_reset_offset(s, args[0]); From b52a2c03b7d36694c21d70bcd46d68aaba5b0840 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 26 Nov 2022 15:18:44 -0800 Subject: [PATCH 06/22] tcg: Introduce set_jmp_insn_offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to the existing set_jmp_reset_offset. Move any assert for TCG_TARGET_HAS_direct_jump into the new function (which now cannot be build-time). Will be unused if TCG_TARGET_HAS_direct_jump is constant 0, but we can't test for constant in the preprocessor, so just mark it G_GNUC_UNUSED. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 3 +-- tcg/i386/tcg-target.c.inc | 3 +-- tcg/loongarch64/tcg-target.c.inc | 3 +-- tcg/ppc/tcg-target.c.inc | 7 +++---- tcg/s390x/tcg-target.c.inc | 2 +- tcg/sparc64/tcg-target.c.inc | 5 ++--- tcg/tcg.c | 10 ++++++++++ 7 files changed, 19 insertions(+), 14 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 90af096c11..59e6a08e93 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1918,7 +1918,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: - qemu_build_assert(TCG_TARGET_HAS_direct_jump); /* * Ensure that ADRP+ADD are 8-byte aligned so that an atomic * write can be used to patch the target address. @@ -1926,7 +1925,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, if ((uintptr_t)s->code_ptr & 7) { tcg_out32(s, NOP); } - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); + set_jmp_insn_offset(s, a0); /* * actual branch destination will be patched by * tb_target_set_jmp_target later diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index c4ff59e9ee..6fb40fe8ba 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2383,7 +2383,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: - qemu_build_assert(TCG_TARGET_HAS_direct_jump); { /* * Jump displacement must be aligned for atomic patching; @@ -2394,7 +2393,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_nopn(s, gap - 1); } tcg_out8(s, OPC_JMP_long); /* jmp im */ - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); + set_jmp_insn_offset(s, a0); tcg_out32(s, 0); } set_jmp_reset_offset(s, a0); diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 5dd645fd17..bce7340604 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1090,7 +1090,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: - qemu_build_assert(TCG_TARGET_HAS_direct_jump); /* * Ensure that patch area is 8-byte aligned so that an * atomic write can be used to patch the target address. @@ -1098,7 +1097,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, if ((uintptr_t)s->code_ptr & 7) { tcg_out_nop(s); } - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); + set_jmp_insn_offset(s, a0); /* * actual branch destination will be patched by * tb_target_set_jmp_target later diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index b72e266990..dbe8ccd353 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2630,20 +2630,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: - qemu_build_assert(TCG_TARGET_HAS_direct_jump); /* Direct jump. */ if (TCG_TARGET_REG_BITS == 64) { /* Ensure the next insns are 8 or 16-byte aligned. */ while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { tcg_out32(s, NOP); } - s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); + set_jmp_insn_offset(s, args[0]); tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); } else { - s->tb_jmp_insn_offset[args[0]] = tcg_current_code_size(s); + set_jmp_insn_offset(s, args[0]); tcg_out32(s, B); - s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s); + set_jmp_reset_offset(s, args[0]); break; } tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 48a0c3e3c0..c234347d6a 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1977,7 +1977,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out16(s, NOP); } tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); + set_jmp_insn_offset(s, a0); s->code_ptr += 2; set_jmp_reset_offset(s, a0); break; diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 26b00d1638..c3109fe51b 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1452,20 +1452,19 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_tb: - qemu_build_assert(TCG_TARGET_HAS_direct_jump); /* Direct jump. */ if (USE_REG_TB) { /* make sure the patch is 8-byte aligned. */ if ((intptr_t)s->code_ptr & 4) { tcg_out_nop(s); } - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); + set_jmp_insn_offset(s, a0); tcg_out_sethi(s, TCG_REG_T1, 0); tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); } else { - s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); + set_jmp_insn_offset(s, a0); tcg_out32(s, CALL); tcg_out_nop(s); } diff --git a/tcg/tcg.c b/tcg/tcg.c index 257479337c..4092dac294 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -313,6 +313,16 @@ static void set_jmp_reset_offset(TCGContext *s, int which) s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); } +static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) +{ + /* + * We will check for overflow at the end of the opcode loop in + * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. + */ + tcg_debug_assert(TCG_TARGET_HAS_direct_jump); + s->tb_jmp_insn_offset[which] = tcg_current_code_size(s); +} + /* Signal overflow, starting over with fewer guest insns. */ static G_NORETURN void tcg_raise_tb_overflow(TCGContext *s) From becc452a367aa681ca0c1fcb688ae0f16b32b11f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 26 Nov 2022 17:42:11 -0800 Subject: [PATCH 07/22] tcg: Introduce get_jmp_target_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to the existing set_jmp_reset_offset. Include the rw->rx address space conversion done by arm and s390x, and forgotten by mips and riscv. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 2 +- tcg/mips/tcg-target.c.inc | 2 +- tcg/riscv/tcg-target.c.inc | 2 +- tcg/tcg.c | 9 +++++++++ tcg/tci/tcg-target.c.inc | 2 +- 5 files changed, 13 insertions(+), 4 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 033ff90daa..83b6d77e2e 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1954,7 +1954,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGReg base = TCG_REG_PC; qemu_build_assert(!TCG_TARGET_HAS_direct_jump); - ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]); + ptr = get_jmp_target_addr(s, args[0]); dif = tcg_pcrel_diff(s, (void *)ptr) - 8; dil = sextract32(dif, 0, 12); if (dif != dil) { diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 02887d7cb1..c30173ab64 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1989,7 +1989,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, /* indirect jump method */ qemu_build_assert(!TCG_TARGET_HAS_direct_jump); tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, - (uintptr_t)(s->tb_jmp_target_addr + a0)); + get_jmp_target_addr(s, a0)); tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); tcg_out_nop(s); set_jmp_reset_offset(s, a0); diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index b977c8025d..5b2eac6ab8 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1314,7 +1314,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, qemu_build_assert(!TCG_TARGET_HAS_direct_jump); /* indirect jump method */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, - (uintptr_t)(s->tb_jmp_target_addr + a0)); + get_jmp_target_addr(s, a0)); tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0); set_jmp_reset_offset(s, a0); break; diff --git a/tcg/tcg.c b/tcg/tcg.c index 4092dac294..2a14fc2a97 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -323,6 +323,15 @@ static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) s->tb_jmp_insn_offset[which] = tcg_current_code_size(s); } +static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) +{ + /* + * Return the read-execute version of the pointer, for the benefit + * of any pc-relative addressing mode. + */ + return (uintptr_t)tcg_splitwx_to_rx(&s->tb_jmp_target_addr[which]); +} + /* Signal overflow, starting over with fewer guest insns. */ static G_NORETURN void tcg_raise_tb_overflow(TCGContext *s) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index ad356f1875..59daffc0a0 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -605,7 +605,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_goto_tb: qemu_build_assert(!TCG_TARGET_HAS_direct_jump); /* indirect jump method. */ - tcg_out_op_p(s, opc, s->tb_jmp_target_addr + args[0]); + tcg_out_op_p(s, opc, (void *)get_jmp_target_addr(s, args[0])); set_jmp_reset_offset(s, args[0]); break; From cf7d6b8e9828784d118eebb6419678d196cd51b5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 26 Nov 2022 17:14:05 -0800 Subject: [PATCH 08/22] tcg: Split out tcg_out_goto_tb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The INDEX_op_goto_tb opcode needs no register allocation. Split out a dedicated helper function for it. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 40 ++++++++++--------- tcg/arm/tcg-target.c.inc | 49 ++++++++++++----------- tcg/i386/tcg-target.c.inc | 33 ++++++++-------- tcg/loongarch64/tcg-target.c.inc | 38 +++++++++--------- tcg/mips/tcg-target.c.inc | 21 +++++----- tcg/ppc/tcg-target.c.inc | 52 ++++++++++++------------ tcg/riscv/tcg-target.c.inc | 20 +++++----- tcg/s390x/tcg-target.c.inc | 31 ++++++++------- tcg/sparc64/tcg-target.c.inc | 68 +++++++++++++++++--------------- tcg/tcg.c | 4 ++ tcg/tci/tcg-target.c.inc | 16 ++++---- 11 files changed, 199 insertions(+), 173 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 59e6a08e93..ad35bee8af 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1898,6 +1898,26 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) } } +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* + * Ensure that ADRP+ADD are 8-byte aligned so that an atomic + * write can be used to patch the target address. + */ + if ((uintptr_t)s->code_ptr & 7) { + tcg_out32(s, NOP); + } + set_jmp_insn_offset(s, which); + /* + * actual branch destination will be patched by + * tb_target_set_jmp_target later + */ + tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); + tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); + tcg_out_insn(s, 3207, BR, TCG_REG_TMP); + set_jmp_reset_offset(s, which); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1917,25 +1937,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, #define REG0(I) (const_args[I] ? TCG_REG_XZR : (TCGReg)args[I]) switch (opc) { - case INDEX_op_goto_tb: - /* - * Ensure that ADRP+ADD are 8-byte aligned so that an atomic - * write can be used to patch the target address. - */ - if ((uintptr_t)s->code_ptr & 7) { - tcg_out32(s, NOP); - } - set_jmp_insn_offset(s, a0); - /* - * actual branch destination will be patched by - * tb_target_set_jmp_target later - */ - tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); - tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); - tcg_out_insn(s, 3207, BR, TCG_REG_TMP); - set_jmp_reset_offset(s, a0); - break; - case INDEX_op_goto_ptr: tcg_out_insn(s, 3207, BR, a0); break; @@ -2306,6 +2307,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: g_assert_not_reached(); } diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index 83b6d77e2e..b8f3b0c634 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1939,6 +1939,31 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) tcg_out_epilogue(s); } +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* Indirect jump method */ + intptr_t ptr, dif, dil; + TCGReg base = TCG_REG_PC; + + qemu_build_assert(!TCG_TARGET_HAS_direct_jump); + ptr = get_jmp_target_addr(s, which); + dif = tcg_pcrel_diff(s, (void *)ptr) - 8; + dil = sextract32(dif, 0, 12); + if (dif != dil) { + /* + * The TB is close, but outside the 12 bits addressable by + * the load. We can extend this to 20 bits with a sub of a + * shifted immediate from pc. In the vastly unlikely event + * the code requires more than 1MB, we'll use 2 insns and + * be no worse off. + */ + base = TCG_REG_R0; + tcg_out_movi32(s, COND_AL, base, ptr - dil); + } + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil); + set_jmp_reset_offset(s, which); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1947,29 +1972,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, int c; switch (opc) { - case INDEX_op_goto_tb: - { - /* Indirect jump method */ - intptr_t ptr, dif, dil; - TCGReg base = TCG_REG_PC; - - qemu_build_assert(!TCG_TARGET_HAS_direct_jump); - ptr = get_jmp_target_addr(s, args[0]); - dif = tcg_pcrel_diff(s, (void *)ptr) - 8; - dil = sextract32(dif, 0, 12); - if (dif != dil) { - /* The TB is close, but outside the 12 bits addressable by - the load. We can extend this to 20 bits with a sub of a - shifted immediate from pc. In the vastly unlikely event - the code requires more than 1MB, we'll use 2 insns and - be no worse off. */ - base = TCG_REG_R0; - tcg_out_movi32(s, COND_AL, base, ptr - dil); - } - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil); - set_jmp_reset_offset(s, args[0]); - } - break; case INDEX_op_goto_ptr: tcg_out_b_reg(s, COND_AL, args[0]); break; @@ -2259,6 +2261,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */ case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 6fb40fe8ba..33c4139730 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2358,6 +2358,22 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) } } +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* + * Jump displacement must be aligned for atomic patching; + * see if we need to add extra nops before jump + */ + int gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr; + if (gap != 1) { + tcg_out_nopn(s, gap - 1); + } + tcg_out8(s, OPC_JMP_long); /* jmp im */ + set_jmp_insn_offset(s, which); + tcg_out32(s, 0); + set_jmp_reset_offset(s, which); +} + static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2382,22 +2398,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const_a2 = const_args[2]; switch (opc) { - case INDEX_op_goto_tb: - { - /* - * Jump displacement must be aligned for atomic patching; - * see if we need to add extra nops before jump - */ - int gap = QEMU_ALIGN_PTR_UP(s->code_ptr + 1, 4) - s->code_ptr; - if (gap != 1) { - tcg_out_nopn(s, gap - 1); - } - tcg_out8(s, OPC_JMP_long); /* jmp im */ - set_jmp_insn_offset(s, a0); - tcg_out32(s, 0); - } - set_jmp_reset_offset(s, a0); - break; case INDEX_op_goto_ptr: /* jmp to the given host address (could be epilogue) */ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, a0); @@ -2792,6 +2792,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index bce7340604..25de7a9ee0 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1079,6 +1079,25 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) } } +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* + * Ensure that patch area is 8-byte aligned so that an + * atomic write can be used to patch the target address. + */ + if ((uintptr_t)s->code_ptr & 7) { + tcg_out_nop(s); + } + set_jmp_insn_offset(s, which); + /* + * actual branch destination will be patched by + * tb_target_set_jmp_target later + */ + tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0); + tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); + set_jmp_reset_offset(s, which); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1089,24 +1108,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, int c2 = const_args[2]; switch (opc) { - case INDEX_op_goto_tb: - /* - * Ensure that patch area is 8-byte aligned so that an - * atomic write can be used to patch the target address. - */ - if ((uintptr_t)s->code_ptr & 7) { - tcg_out_nop(s); - } - set_jmp_insn_offset(s, a0); - /* - * actual branch destination will be patched by - * tb_target_set_jmp_target later - */ - tcg_out_opc_pcaddu18i(s, TCG_REG_TMP0, 0); - tcg_out_opc_jirl(s, TCG_REG_ZERO, TCG_REG_TMP0, 0); - set_jmp_reset_offset(s, a0); - break; - case INDEX_op_mb: tcg_out_mb(s, a0); break; @@ -1501,6 +1502,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: g_assert_not_reached(); } diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index c30173ab64..e54df4128b 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1966,6 +1966,17 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) tcg_out_opc_imm(s, OPC_ORI, TCG_REG_V0, b0, a0 & 0xffff); } +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* indirect jump method */ + qemu_build_assert(!TCG_TARGET_HAS_direct_jump); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, + get_jmp_target_addr(s, which)); + tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); + tcg_out_nop(s); + set_jmp_reset_offset(s, which); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1985,15 +1996,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, c2 = const_args[2]; switch (opc) { - case INDEX_op_goto_tb: - /* indirect jump method */ - qemu_build_assert(!TCG_TARGET_HAS_direct_jump); - tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, - get_jmp_target_addr(s, a0)); - tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); - tcg_out_nop(s); - set_jmp_reset_offset(s, a0); - break; case INDEX_op_goto_ptr: /* jmp to the given host address (could be epilogue) */ tcg_out_opc_reg(s, OPC_JR, 0, a0, 0); @@ -2402,6 +2404,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index dbe8ccd353..e56f86c613 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -2622,6 +2622,32 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) tcg_out_b(s, 0, tcg_code_gen_epilogue); } +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* Direct jump. */ + if (TCG_TARGET_REG_BITS == 64) { + /* Ensure the next insns are 8 or 16-byte aligned. */ + while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { + tcg_out32(s, NOP); + } + set_jmp_insn_offset(s, which); + tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); + tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); + tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + set_jmp_reset_offset(s, which); + if (USE_REG_TB) { + /* For the unlinked case, need to reset TCG_REG_TB. */ + tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, + -tcg_current_code_size(s)); + } + } else { + set_jmp_insn_offset(s, which); + tcg_out32(s, B); + set_jmp_reset_offset(s, which); + } +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -2629,31 +2655,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGArg a0, a1, a2; switch (opc) { - case INDEX_op_goto_tb: - /* Direct jump. */ - if (TCG_TARGET_REG_BITS == 64) { - /* Ensure the next insns are 8 or 16-byte aligned. */ - while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { - tcg_out32(s, NOP); - } - set_jmp_insn_offset(s, args[0]); - tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); - tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); - } else { - set_jmp_insn_offset(s, args[0]); - tcg_out32(s, B); - set_jmp_reset_offset(s, args[0]); - break; - } - tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); - tcg_out32(s, BCCTR | BO_ALWAYS); - set_jmp_reset_offset(s, args[0]); - if (USE_REG_TB) { - /* For the unlinked case, need to reset TCG_REG_TB. */ - tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, - -tcg_current_code_size(s)); - } - break; case INDEX_op_goto_ptr: tcg_out32(s, MTSPR | RS(args[0]) | CTR); if (USE_REG_TB) { @@ -3181,6 +3182,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 5b2eac6ab8..ee6759f787 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1300,6 +1300,16 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) } } +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + qemu_build_assert(!TCG_TARGET_HAS_direct_jump); + /* indirect jump method */ + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, + get_jmp_target_addr(s, which)); + tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0); + set_jmp_reset_offset(s, which); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1310,15 +1320,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, int c2 = const_args[2]; switch (opc) { - case INDEX_op_goto_tb: - qemu_build_assert(!TCG_TARGET_HAS_direct_jump); - /* indirect jump method */ - tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, - get_jmp_target_addr(s, a0)); - tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0); - set_jmp_reset_offset(s, a0); - break; - case INDEX_op_goto_ptr: tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, a0, 0); break; @@ -1619,6 +1620,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: g_assert_not_reached(); } diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index c234347d6a..e008f0efcc 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1955,6 +1955,21 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) } } +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* + * Branch displacement must be aligned for atomic patching; + * see if we need to add extra nop before branch + */ + if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) { + tcg_out16(s, NOP); + } + tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); + set_jmp_insn_offset(s, which); + s->code_ptr += 2; + set_jmp_reset_offset(s, which); +} + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ case glue(glue(INDEX_op_,x),_i64) @@ -1967,21 +1982,6 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGArg a0, a1, a2; switch (opc) { - case INDEX_op_goto_tb: - a0 = args[0]; - /* - * branch displacement must be aligned for atomic patching; - * see if we need to add extra nop before branch - */ - if (!QEMU_PTR_IS_ALIGNED(s->code_ptr + 1, 4)) { - tcg_out16(s, NOP); - } - tcg_out16(s, RIL_BRCL | (S390_CC_ALWAYS << 4)); - set_jmp_insn_offset(s, a0); - s->code_ptr += 2; - set_jmp_reset_offset(s, a0); - break; - case INDEX_op_goto_ptr: a0 = args[0]; tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, a0); @@ -2620,6 +2620,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index c3109fe51b..594767ded8 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1437,6 +1437,41 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, a0 & 0x3ff, ARITH_OR); } +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + /* Direct jump. */ + if (USE_REG_TB) { + /* make sure the patch is 8-byte aligned. */ + if ((intptr_t)s->code_ptr & 4) { + tcg_out_nop(s); + } + set_jmp_insn_offset(s, which); + tcg_out_sethi(s, TCG_REG_T1, 0); + tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); + tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); + tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); + } else { + set_jmp_insn_offset(s, which); + tcg_out32(s, CALL); + tcg_out_nop(s); + } + set_jmp_reset_offset(s, which); + + /* + * For the unlinked path of goto_tb, we need to reset TCG_REG_TB + * to the beginning of this TB. + */ + if (USE_REG_TB) { + int c = -tcg_current_code_size(s); + if (check_fit_i32(c, 13)) { + tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c); + tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); + } + } +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1451,38 +1486,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, c2 = const_args[2]; switch (opc) { - case INDEX_op_goto_tb: - /* Direct jump. */ - if (USE_REG_TB) { - /* make sure the patch is 8-byte aligned. */ - if ((intptr_t)s->code_ptr & 4) { - tcg_out_nop(s); - } - set_jmp_insn_offset(s, a0); - tcg_out_sethi(s, TCG_REG_T1, 0); - tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); - tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); - tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); - } else { - set_jmp_insn_offset(s, a0); - tcg_out32(s, CALL); - tcg_out_nop(s); - } - set_jmp_reset_offset(s, a0); - - /* For the unlinked path of goto_tb, we need to reset - TCG_REG_TB to the beginning of this TB. */ - if (USE_REG_TB) { - c = -tcg_current_code_size(s); - if (check_fit_i32(c, 13)) { - tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c); - tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, - TCG_REG_T1, ARITH_ADD); - } - } - break; case INDEX_op_goto_ptr: tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL); if (USE_REG_TB) { @@ -1701,6 +1704,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } diff --git a/tcg/tcg.c b/tcg/tcg.c index 2a14fc2a97..ffa4506e57 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -105,6 +105,7 @@ static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg); static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret, tcg_target_long arg); static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg); +static void tcg_out_goto_tb(TCGContext *s, int which); static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]); @@ -4741,6 +4742,9 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) case INDEX_op_exit_tb: tcg_out_exit_tb(s, op->args[0]); break; + case INDEX_op_goto_tb: + tcg_out_goto_tb(s, op->args[0]); + break; case INDEX_op_dup2_vec: if (tcg_reg_alloc_dup2(s, op)) { break; diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 59daffc0a0..f2ac356900 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -595,6 +595,14 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) tcg_out_op_p(s, INDEX_op_exit_tb, (void *)arg); } +static void tcg_out_goto_tb(TCGContext *s, int which) +{ + qemu_build_assert(!TCG_TARGET_HAS_direct_jump); + /* indirect jump method. */ + tcg_out_op_p(s, INDEX_op_goto_tb, (void *)get_jmp_target_addr(s, which)); + set_jmp_reset_offset(s, which); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -602,13 +610,6 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, TCGOpcode exts; switch (opc) { - case INDEX_op_goto_tb: - qemu_build_assert(!TCG_TARGET_HAS_direct_jump); - /* indirect jump method. */ - tcg_out_op_p(s, opc, (void *)get_jmp_target_addr(s, args[0])); - set_jmp_reset_offset(s, args[0]); - break; - case INDEX_op_goto_ptr: tcg_out_op_r(s, opc, args[0]); break; @@ -781,6 +782,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_mov_i64: case INDEX_op_call: /* Always emitted via tcg_out_call. */ case INDEX_op_exit_tb: /* Always emitted via tcg_out_exit_tb. */ + case INDEX_op_goto_tb: /* Always emitted via tcg_out_goto_tb. */ default: tcg_abort(); } From 3a50f424c9e066bee18bfa9cadcd3e21003ca6bb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 26 Nov 2022 18:20:57 -0800 Subject: [PATCH 09/22] tcg: Rename TB_JMP_RESET_OFFSET_INVALID to TB_JMP_OFFSET_INVALID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This will shortly be used for more than reset. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 8 ++++---- include/exec/exec-all.h | 2 +- tcg/tcg.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 979f8e1107..a4fdce5b72 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -508,10 +508,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->jmp_dest[1] = (uintptr_t)NULL; /* init original jump addresses which have been set during tcg_gen_code() */ - if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { + if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { tb_reset_jump(tb, 0); } - if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { + if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { tb_reset_jump(tb, 1); } @@ -693,9 +693,9 @@ static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data) if (tb_page_addr1(tb) != -1) { tst->cross_page++; } - if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) { + if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) { tst->direct_jmp_count++; - if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) { + if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) { tst->direct_jmp2_count++; } } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 25e11b0a8d..b4d09c89ab 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -585,8 +585,8 @@ struct TranslationBlock { * setting one of the jump targets (or patching the jump instruction). Only * two of such jumps are supported. */ +#define TB_JMP_OFFSET_INVALID 0xffff /* indicates no jump generated */ uint16_t jmp_reset_offset[2]; /* offset of original jump target */ -#define TB_JMP_RESET_OFFSET_INVALID 0xffff /* indicates no jump generated */ uintptr_t jmp_target_arg[2]; /* target address or offset */ /* diff --git a/tcg/tcg.c b/tcg/tcg.c index ffa4506e57..ff674c5122 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -4666,8 +4666,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) #endif /* Initialize goto_tb jump offsets. */ - tb->jmp_reset_offset[0] = TB_JMP_RESET_OFFSET_INVALID; - tb->jmp_reset_offset[1] = TB_JMP_RESET_OFFSET_INVALID; + tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; + tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset; if (TCG_TARGET_HAS_direct_jump) { tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg; From b7e4afbd9f2f9233a37e021f9e8cce472aecdd64 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 26 Nov 2022 18:39:55 -0800 Subject: [PATCH 10/22] tcg: Add gen_tb to TCGContext MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can replace four other variables that are references into the TranslationBlock structure. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/translate-all.c | 2 +- include/tcg/tcg.h | 11 +++-------- tcg/tcg-op.c | 14 +++++++------- tcg/tcg.c | 14 +++----------- 4 files changed, 14 insertions(+), 27 deletions(-) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index a4fdce5b72..9e925c10f3 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -350,7 +350,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb->trace_vcpu_dstate = *cpu->trace_dstate; tb_set_page_addr0(tb, phys_pc); tb_set_page_addr1(tb, -1); - tcg_ctx->tb_cflags = cflags; + tcg_ctx->gen_tb = tb; tb_overflow: #ifdef CONFIG_PROFILER diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index b949d75fdd..c2d5430b5a 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -552,20 +552,15 @@ struct TCGContext { int nb_indirects; int nb_ops; - /* goto_tb support */ - tcg_insn_unit *code_buf; - uint16_t *tb_jmp_reset_offset; /* tb->jmp_reset_offset */ - uintptr_t *tb_jmp_insn_offset; /* tb->jmp_target_arg if direct_jump */ - uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_arg if !direct_jump */ - TCGRegSet reserved_regs; - uint32_t tb_cflags; /* cflags of the current TB */ intptr_t current_frame_offset; intptr_t frame_start; intptr_t frame_end; TCGTemp *frame_temp; - tcg_insn_unit *code_ptr; + TranslationBlock *gen_tb; /* tb for which code is being generated */ + tcg_insn_unit *code_buf; /* pointer for start of tb */ + tcg_insn_unit *code_ptr; /* pointer for running end of tb */ #ifdef CONFIG_PROFILER TCGProfile prof; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index cd1cd4e736..9fa9f1b0fd 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -86,7 +86,7 @@ void tcg_gen_op6(TCGOpcode opc, TCGArg a1, TCGArg a2, TCGArg a3, void tcg_gen_mb(TCGBar mb_type) { - if (tcg_ctx->tb_cflags & CF_PARALLEL) { + if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { tcg_gen_op1(INDEX_op_mb, mb_type); } } @@ -2782,7 +2782,7 @@ void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx) void tcg_gen_goto_tb(unsigned idx) { /* We tested CF_NO_GOTO_TB in translator_use_goto_tb. */ - tcg_debug_assert(!(tcg_ctx->tb_cflags & CF_NO_GOTO_TB)); + tcg_debug_assert(!(tcg_ctx->gen_tb->cflags & CF_NO_GOTO_TB)); /* We only support two chained exits. */ tcg_debug_assert(idx <= TB_EXIT_IDXMAX); #ifdef CONFIG_DEBUG_TCG @@ -2798,7 +2798,7 @@ void tcg_gen_lookup_and_goto_ptr(void) { TCGv_ptr ptr; - if (tcg_ctx->tb_cflags & CF_NO_GOTO_PTR) { + if (tcg_ctx->gen_tb->cflags & CF_NO_GOTO_PTR) { tcg_gen_exit_tb(NULL, 0); return; } @@ -3165,7 +3165,7 @@ void tcg_gen_atomic_cmpxchg_i32(TCGv_i32 retv, TCGv addr, TCGv_i32 cmpv, { memop = tcg_canonicalize_memop(memop, 0, 0); - if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) { + if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) { TCGv_i32 t1 = tcg_temp_new_i32(); TCGv_i32 t2 = tcg_temp_new_i32(); @@ -3203,7 +3203,7 @@ void tcg_gen_atomic_cmpxchg_i64(TCGv_i64 retv, TCGv addr, TCGv_i64 cmpv, { memop = tcg_canonicalize_memop(memop, 1, 0); - if (!(tcg_ctx->tb_cflags & CF_PARALLEL)) { + if (!(tcg_ctx->gen_tb->cflags & CF_PARALLEL)) { TCGv_i64 t1 = tcg_temp_new_i64(); TCGv_i64 t2 = tcg_temp_new_i64(); @@ -3364,7 +3364,7 @@ static void * const table_##NAME[(MO_SIZE | MO_BSWAP) + 1] = { \ void tcg_gen_atomic_##NAME##_i32 \ (TCGv_i32 ret, TCGv addr, TCGv_i32 val, TCGArg idx, MemOp memop) \ { \ - if (tcg_ctx->tb_cflags & CF_PARALLEL) { \ + if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \ do_atomic_op_i32(ret, addr, val, idx, memop, table_##NAME); \ } else { \ do_nonatomic_op_i32(ret, addr, val, idx, memop, NEW, \ @@ -3374,7 +3374,7 @@ void tcg_gen_atomic_##NAME##_i32 \ void tcg_gen_atomic_##NAME##_i64 \ (TCGv_i64 ret, TCGv addr, TCGv_i64 val, TCGArg idx, MemOp memop) \ { \ - if (tcg_ctx->tb_cflags & CF_PARALLEL) { \ + if (tcg_ctx->gen_tb->cflags & CF_PARALLEL) { \ do_atomic_op_i64(ret, addr, val, idx, memop, table_##NAME); \ } else { \ do_nonatomic_op_i64(ret, addr, val, idx, memop, NEW, \ diff --git a/tcg/tcg.c b/tcg/tcg.c index ff674c5122..4ac7086afe 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -311,7 +311,7 @@ static void set_jmp_reset_offset(TCGContext *s, int which) * We will check for overflow at the end of the opcode loop in * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. */ - s->tb_jmp_reset_offset[which] = tcg_current_code_size(s); + s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s); } static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) @@ -321,7 +321,7 @@ static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. */ tcg_debug_assert(TCG_TARGET_HAS_direct_jump); - s->tb_jmp_insn_offset[which] = tcg_current_code_size(s); + s->gen_tb->jmp_target_arg[which] = tcg_current_code_size(s); } static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) @@ -330,7 +330,7 @@ static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) * Return the read-execute version of the pointer, for the benefit * of any pc-relative addressing mode. */ - return (uintptr_t)tcg_splitwx_to_rx(&s->tb_jmp_target_addr[which]); + return (uintptr_t)tcg_splitwx_to_rx(s->gen_tb->jmp_target_arg + which); } /* Signal overflow, starting over with fewer guest insns. */ @@ -4668,14 +4668,6 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) /* Initialize goto_tb jump offsets. */ tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; - tcg_ctx->tb_jmp_reset_offset = tb->jmp_reset_offset; - if (TCG_TARGET_HAS_direct_jump) { - tcg_ctx->tb_jmp_insn_offset = tb->jmp_target_arg; - tcg_ctx->tb_jmp_target_addr = NULL; - } else { - tcg_ctx->tb_jmp_insn_offset = NULL; - tcg_ctx->tb_jmp_target_addr = tb->jmp_target_arg; - } tcg_reg_alloc_start(s); From 9da6079b2695dcffd8b18890db6cafdf4dc373db Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 26 Nov 2022 18:54:23 -0800 Subject: [PATCH 11/22] tcg: Add TranslationBlock.jmp_insn_offset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop overloading jmp_target_arg for both offset and address, depending on TCG_TARGET_HAS_direct_jump. Instead, add a new field to hold the jump insn offset and always set the target address in jmp_target_addr[]. This will allow a tcg backend to use either direct or indirect depending on displacement. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 5 ++--- include/exec/exec-all.h | 3 ++- tcg/tcg.c | 6 ++++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 8927092537..25c4b04445 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -572,14 +572,13 @@ void cpu_exec_step_atomic(CPUState *cpu) void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) { + tb->jmp_target_addr[n] = addr; if (TCG_TARGET_HAS_direct_jump) { - uintptr_t offset = tb->jmp_target_arg[n]; + uintptr_t offset = tb->jmp_insn_offset[n]; uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr; uintptr_t jmp_rx = tc_ptr + offset; uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff; tb_target_set_jmp_target(tc_ptr, jmp_rx, jmp_rw, addr); - } else { - tb->jmp_target_arg[n] = addr; } } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index b4d09c89ab..54585a9954 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -587,7 +587,8 @@ struct TranslationBlock { */ #define TB_JMP_OFFSET_INVALID 0xffff /* indicates no jump generated */ uint16_t jmp_reset_offset[2]; /* offset of original jump target */ - uintptr_t jmp_target_arg[2]; /* target address or offset */ + uint16_t jmp_insn_offset[2]; /* offset of direct jump insn */ + uintptr_t jmp_target_addr[2]; /* target address */ /* * Each TB has a NULL-terminated list (jmp_list_head) of incoming jumps. diff --git a/tcg/tcg.c b/tcg/tcg.c index 4ac7086afe..af2af99583 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -321,7 +321,7 @@ static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. */ tcg_debug_assert(TCG_TARGET_HAS_direct_jump); - s->gen_tb->jmp_target_arg[which] = tcg_current_code_size(s); + s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); } static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) @@ -330,7 +330,7 @@ static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which) * Return the read-execute version of the pointer, for the benefit * of any pc-relative addressing mode. */ - return (uintptr_t)tcg_splitwx_to_rx(s->gen_tb->jmp_target_arg + which); + return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]); } /* Signal overflow, starting over with fewer guest insns. */ @@ -4668,6 +4668,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start) /* Initialize goto_tb jump offsets. */ tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID; tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID; + tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID; + tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID; tcg_reg_alloc_start(s); From 0fe1c98da9d9abb8e5dc4a67c7e3bcf19aad1e85 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 5 Dec 2022 11:31:20 -0600 Subject: [PATCH 12/22] tcg: Change tb_target_set_jmp_target arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace 'tc_ptr' and 'addr' with 'tb' and 'n'. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 11 ++++++++--- tcg/aarch64/tcg-target.c.inc | 5 +++-- tcg/aarch64/tcg-target.h | 3 ++- tcg/arm/tcg-target.h | 3 ++- tcg/i386/tcg-target.c.inc | 9 +++++++++ tcg/i386/tcg-target.h | 9 ++------- tcg/loongarch64/tcg-target.c.inc | 5 +++-- tcg/loongarch64/tcg-target.h | 3 ++- tcg/mips/tcg-target.h | 3 ++- tcg/ppc/tcg-target.c.inc | 7 ++++--- tcg/ppc/tcg-target.h | 3 ++- tcg/riscv/tcg-target.h | 3 ++- tcg/s390x/tcg-target.c.inc | 10 ++++++++++ tcg/s390x/tcg-target.h | 10 ++-------- tcg/sparc64/tcg-target.c.inc | 7 ++++--- tcg/sparc64/tcg-target.h | 3 ++- tcg/tci/tcg-target.h | 3 ++- 17 files changed, 61 insertions(+), 36 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 25c4b04445..37c5f91074 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -574,11 +574,16 @@ void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) { tb->jmp_target_addr[n] = addr; if (TCG_TARGET_HAS_direct_jump) { + /* + * Get the rx view of the structure, from which we find the + * executable code address, and tb_target_set_jmp_target can + * produce a pc-relative displacement to jmp_target_addr[n]. + */ + const TranslationBlock *c_tb = tcg_splitwx_to_rx(tb); uintptr_t offset = tb->jmp_insn_offset[n]; - uintptr_t tc_ptr = (uintptr_t)tb->tc.ptr; - uintptr_t jmp_rx = tc_ptr + offset; + uintptr_t jmp_rx = (uintptr_t)tb->tc.ptr + offset; uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff; - tb_target_set_jmp_target(tc_ptr, jmp_rx, jmp_rw, addr); + tb_target_set_jmp_target(c_tb, n, jmp_rx, jmp_rw); } } diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index ad35bee8af..0b65f2cac1 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1353,9 +1353,10 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, tcg_out_call_int(s, target); } -void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, - uintptr_t jmp_rw, uintptr_t addr) +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) { + uintptr_t addr = tb->jmp_target_addr[n]; tcg_insn_unit i1, i2; TCGType rt = TCG_TYPE_I64; TCGReg rd = TCG_REG_TMP; diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 413a5410c5..d491c198da 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -152,7 +152,8 @@ typedef enum { #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 0 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(const TranslationBlock *, int, + uintptr_t, uintptr_t); #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index b7843d2d54..4c1433093c 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -152,7 +152,8 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_MEMORY_BSWAP 0 /* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t, uintptr_t); #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc index 33c4139730..c71c3e664d 100644 --- a/tcg/i386/tcg-target.c.inc +++ b/tcg/i386/tcg-target.c.inc @@ -2374,6 +2374,15 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + /* patch the branch destination */ + uintptr_t addr = tb->jmp_target_addr[n]; + qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4)); + /* no need to flush icache explicitly */ +} + static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 7edb7f1d9a..7500ceaab9 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -220,13 +220,8 @@ extern bool have_movbe; #define TCG_TARGET_extract_i64_valid(ofs, len) \ (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32) -static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, - uintptr_t jmp_rw, uintptr_t addr) -{ - /* patch the branch destination */ - qatomic_set((int32_t *)jmp_rw, addr - (jmp_rx + 4)); - /* no need to flush icache explicitly */ -} +void tb_target_set_jmp_target(const TranslationBlock *, int, + uintptr_t, uintptr_t); /* This defines the natural memory order supported by this * architecture before guarantees made by various barrier diff --git a/tcg/loongarch64/tcg-target.c.inc b/tcg/loongarch64/tcg-target.c.inc index 25de7a9ee0..3174557ce3 100644 --- a/tcg/loongarch64/tcg-target.c.inc +++ b/tcg/loongarch64/tcg-target.c.inc @@ -1039,11 +1039,12 @@ static void tcg_out_nop(TCGContext *s) tcg_out32(s, NOP); } -void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, - uintptr_t jmp_rw, uintptr_t addr) +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) { tcg_insn_unit i1, i2; ptrdiff_t upper, lower; + uintptr_t addr = tb->jmp_target_addr[n]; ptrdiff_t offset = (ptrdiff_t)(addr - jmp_rx) >> 2; if (offset == sextreg(offset, 0, 26)) { diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index e5f7a1f09d..a150c3c7b2 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -171,7 +171,8 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 15721c3e42..d1adf3e326 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -206,7 +206,8 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_MEMORY_BSWAP 1 /* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t) +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t, uintptr_t) QEMU_ERROR("code path is reachable"); #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index e56f86c613..6f2c8faea6 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1893,11 +1893,12 @@ static inline void ppc64_replace4(uintptr_t rx, uintptr_t rw, flush_idcache_range(rx, rw, 16); } -void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, - uintptr_t jmp_rw, uintptr_t addr) +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) { tcg_insn_unit i0, i1, i2, i3; - intptr_t tb_diff = addr - tc_ptr; + uintptr_t addr = tb->jmp_target_addr[n]; + intptr_t tb_diff = addr - (uintptr_t)tb->tc.ptr; intptr_t br_diff = addr - (jmp_rx + 4); intptr_t lo, hi; diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index b5cd225cfa..02764c3331 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -180,7 +180,8 @@ extern bool have_vsx; #define TCG_TARGET_HAS_bitsel_vec have_vsx #define TCG_TARGET_HAS_cmpsel_vec 0 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index 232537ccea..bce164fde2 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -166,7 +166,8 @@ typedef enum { #endif /* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t, uintptr_t); #define TCG_TARGET_DEFAULT_MO (0) diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index e008f0efcc..2d049a4cc7 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1970,6 +1970,16 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + /* patch the branch destination */ + uintptr_t addr = tb->jmp_target_addr[n]; + intptr_t disp = addr - (jmp_rx - 2); + qatomic_set((int32_t *)jmp_rw, disp / 2); + /* no need to flush icache explicitly */ +} + # define OP_32_64(x) \ case glue(glue(INDEX_op_,x),_i32): \ case glue(glue(INDEX_op_,x),_i64) diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 68dcbc6645..57ba165800 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -175,14 +175,8 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) -static inline void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, - uintptr_t jmp_rw, uintptr_t addr) -{ - /* patch the branch destination */ - intptr_t disp = addr - (jmp_rx - 2); - qatomic_set((int32_t *)jmp_rw, disp / 2); - /* no need to flush icache explicitly */ -} +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw); #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 594767ded8..fdb711bdf6 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1885,10 +1885,11 @@ void tcg_register_jit(const void *buf, size_t buf_size) tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } -void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_rx, - uintptr_t jmp_rw, uintptr_t addr) +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) { - intptr_t tb_disp = addr - tc_ptr; + uintptr_t addr = tb->jmp_target_addr[n]; + intptr_t tb_disp = addr - (uintptr_t)tb->tc.ptr; intptr_t br_disp = addr - jmp_rx; tcg_insn_unit i1, i2; diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h index 0044ac8d78..282833bd8d 100644 --- a/tcg/sparc64/tcg-target.h +++ b/tcg/sparc64/tcg-target.h @@ -155,7 +155,8 @@ extern bool use_vis3_instructions; #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t, uintptr_t); #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 94ec541b4e..f9ee83d751 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -177,6 +177,7 @@ typedef enum { #define TCG_TARGET_HAS_MEMORY_BSWAP 1 /* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t, uintptr_t); +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t, uintptr_t); #endif /* TCG_TARGET_H */ From 0012e3516e0f47b3aaf9213aea2c969cf6e8f42a Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 5 Dec 2022 16:34:03 -0600 Subject: [PATCH 13/22] tcg: Move tb_target_set_jmp_target declaration to tcg.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- include/tcg/tcg.h | 3 +++ tcg/aarch64/tcg-target.h | 4 ---- tcg/arm/tcg-target.h | 5 ----- tcg/i386/tcg-target.h | 3 --- tcg/loongarch64/tcg-target.h | 3 --- tcg/mips/tcg-target.h | 5 ----- tcg/ppc/tcg-target.h | 4 ---- tcg/riscv/tcg-target.h | 4 ---- tcg/s390x/tcg-target.h | 4 ---- tcg/sparc64/tcg-target.h | 4 ---- tcg/tci/tcg-target.h | 4 ---- 11 files changed, 3 insertions(+), 40 deletions(-) diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h index c2d5430b5a..6f497172f8 100644 --- a/include/tcg/tcg.h +++ b/include/tcg/tcg.h @@ -833,6 +833,9 @@ void tcg_func_start(TCGContext *s); int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start); +void tb_target_set_jmp_target(const TranslationBlock *, int, + uintptr_t, uintptr_t); + void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size); TCGTemp *tcg_global_mem_new_internal(TCGType, TCGv_ptr, diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index d491c198da..a585d035d9 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -151,10 +151,6 @@ typedef enum { #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 0 - -void tb_target_set_jmp_target(const TranslationBlock *, int, - uintptr_t, uintptr_t); - #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index 4c1433093c..d347a5dc53 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -150,11 +150,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 0 - -/* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t, uintptr_t); - #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 7500ceaab9..d3705da2ed 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -220,9 +220,6 @@ extern bool have_movbe; #define TCG_TARGET_extract_i64_valid(ofs, len) \ (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32) -void tb_target_set_jmp_target(const TranslationBlock *, int, - uintptr_t, uintptr_t); - /* This defines the natural memory order supported by this * architecture before guarantees made by various barrier * instructions. diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index a150c3c7b2..5782c6887c 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -171,9 +171,6 @@ typedef enum { #define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t, uintptr_t); - #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index d1adf3e326..82b40100cf 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -205,11 +205,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 -/* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t, uintptr_t) - QEMU_ERROR("code path is reachable"); - #define TCG_TARGET_NEED_LDST_LABELS #endif diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 02764c3331..5ffb41fb57 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -180,12 +180,8 @@ extern bool have_vsx; #define TCG_TARGET_HAS_bitsel_vec have_vsx #define TCG_TARGET_HAS_cmpsel_vec 0 -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t, uintptr_t); - #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 - #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index bce164fde2..c9af6d592f 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -165,10 +165,6 @@ typedef enum { #define TCG_TARGET_HAS_mulsh_i64 1 #endif -/* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t, uintptr_t); - #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_NEED_LDST_LABELS diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 57ba165800..9f5d1cf1c7 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -174,10 +174,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_MEMORY_BSWAP 1 #define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) - -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t jmp_rx, uintptr_t jmp_rw); - #define TCG_TARGET_NEED_LDST_LABELS #define TCG_TARGET_NEED_POOL_LABELS diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h index 282833bd8d..b78a545581 100644 --- a/tcg/sparc64/tcg-target.h +++ b/tcg/sparc64/tcg-target.h @@ -154,10 +154,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_DEFAULT_MO (0) #define TCG_TARGET_HAS_MEMORY_BSWAP 1 - -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t, uintptr_t); - #define TCG_TARGET_NEED_POOL_LABELS #endif diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index f9ee83d751..359d62c2f3 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -176,8 +176,4 @@ typedef enum { #define TCG_TARGET_HAS_MEMORY_BSWAP 1 -/* not defined -- call should be eliminated at compile time */ -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t, uintptr_t); - #endif /* TCG_TARGET_H */ From 90c0fee3a28b25d23081b3c435762cadde813ec4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 5 Dec 2022 16:43:18 -0600 Subject: [PATCH 14/22] tcg: Always define tb_target_set_jmp_target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Install empty versions for !TCG_TARGET_HAS_direct_jump hosts. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 6 ++++++ tcg/mips/tcg-target.c.inc | 6 ++++++ tcg/riscv/tcg-target.c.inc | 6 ++++++ tcg/tci/tcg-target.c.inc | 6 ++++++ 4 files changed, 24 insertions(+) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index b8f3b0c634..b21dd561fa 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1964,6 +1964,12 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + /* Always indirect, nothing to do */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index e54df4128b..0b5e100cb1 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1977,6 +1977,12 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + /* Always indirect, nothing to do */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index ee6759f787..e6a3915859 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1310,6 +1310,12 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + /* Always indirect, nothing to do */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index f2ac356900..54779d86d9 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -603,6 +603,12 @@ static void tcg_out_goto_tb(TCGContext *s, int which) set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + /* Always indirect, nothing to do */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) From 2fd2e78d1b5281d589eabdf31a21166c80bebd80 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 5 Dec 2022 16:55:40 -0600 Subject: [PATCH 15/22] tcg: Remove TCG_TARGET_HAS_direct_jump MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now have the option to generate direct or indirect goto_tb depending on the dynamic displacement, thus the define is no longer necessary or completely accurate. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 23 +++++++++++------------ tcg/aarch64/tcg-target.h | 1 - tcg/arm/tcg-target.c.inc | 1 - tcg/arm/tcg-target.h | 1 - tcg/i386/tcg-target.h | 1 - tcg/loongarch64/tcg-target.h | 1 - tcg/mips/tcg-target.c.inc | 1 - tcg/mips/tcg-target.h | 1 - tcg/ppc/tcg-target.h | 1 - tcg/riscv/tcg-target.c.inc | 1 - tcg/riscv/tcg-target.h | 1 - tcg/s390x/tcg-target.c.inc | 3 +++ tcg/s390x/tcg-target.h | 1 - tcg/sparc64/tcg-target.h | 1 - tcg/tcg.c | 1 - tcg/tci/tcg-target.c.inc | 1 - tcg/tci/tcg-target.h | 1 - 17 files changed, 14 insertions(+), 27 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 37c5f91074..04cd1f3092 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -572,19 +572,18 @@ void cpu_exec_step_atomic(CPUState *cpu) void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) { + /* + * Get the rx view of the structure, from which we find the + * executable code address, and tb_target_set_jmp_target can + * produce a pc-relative displacement to jmp_target_addr[n]. + */ + const TranslationBlock *c_tb = tcg_splitwx_to_rx(tb); + uintptr_t offset = tb->jmp_insn_offset[n]; + uintptr_t jmp_rx = (uintptr_t)tb->tc.ptr + offset; + uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff; + tb->jmp_target_addr[n] = addr; - if (TCG_TARGET_HAS_direct_jump) { - /* - * Get the rx view of the structure, from which we find the - * executable code address, and tb_target_set_jmp_target can - * produce a pc-relative displacement to jmp_target_addr[n]. - */ - const TranslationBlock *c_tb = tcg_splitwx_to_rx(tb); - uintptr_t offset = tb->jmp_insn_offset[n]; - uintptr_t jmp_rx = (uintptr_t)tb->tc.ptr + offset; - uintptr_t jmp_rw = jmp_rx - tcg_splitwx_diff; - tb_target_set_jmp_target(c_tb, n, jmp_rx, jmp_rw); - } + tb_target_set_jmp_target(c_tb, n, jmp_rx, jmp_rw); } static inline void tb_add_jump(TranslationBlock *tb, int n, diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index a585d035d9..6067446b03 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -123,7 +123,6 @@ typedef enum { #define TCG_TARGET_HAS_muls2_i64 0 #define TCG_TARGET_HAS_muluh_i64 1 #define TCG_TARGET_HAS_mulsh_i64 1 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_v64 1 #define TCG_TARGET_HAS_v128 1 diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index b21dd561fa..e1e1c2620d 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1945,7 +1945,6 @@ static void tcg_out_goto_tb(TCGContext *s, int which) intptr_t ptr, dif, dil; TCGReg base = TCG_REG_PC; - qemu_build_assert(!TCG_TARGET_HAS_direct_jump); ptr = get_jmp_target_addr(s, which); dif = tcg_pcrel_diff(s, (void *)ptr) - 8; dil = sextract32(dif, 0, 12); diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h index d347a5dc53..91b8954804 100644 --- a/tcg/arm/tcg-target.h +++ b/tcg/arm/tcg-target.h @@ -121,7 +121,6 @@ extern bool use_neon_instructions; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_div_i32 use_idiv_instructions #define TCG_TARGET_HAS_rem_i32 0 -#define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_v64 use_neon_instructions diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index d3705da2ed..5797a55ea0 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -141,7 +141,6 @@ extern bool have_movbe; #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_direct_jump 1 #if TCG_TARGET_REG_BITS == 64 /* Keep target addresses zero-extended in a register. */ diff --git a/tcg/loongarch64/tcg-target.h b/tcg/loongarch64/tcg-target.h index 5782c6887c..1c3e48d662 100644 --- a/tcg/loongarch64/tcg-target.h +++ b/tcg/loongarch64/tcg-target.h @@ -128,7 +128,6 @@ typedef enum { #define TCG_TARGET_HAS_clz_i32 1 #define TCG_TARGET_HAS_ctz_i32 1 #define TCG_TARGET_HAS_ctpop_i32 0 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_brcond2 0 #define TCG_TARGET_HAS_setcond2 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 0b5e100cb1..6e000d8e69 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1969,7 +1969,6 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) static void tcg_out_goto_tb(TCGContext *s, int which) { /* indirect jump method */ - qemu_build_assert(!TCG_TARGET_HAS_direct_jump); tcg_out_ld(s, TCG_TYPE_PTR, TCG_TMP0, TCG_REG_ZERO, get_jmp_target_addr(s, which)); tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0); diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h index 82b40100cf..7bc8e15293 100644 --- a/tcg/mips/tcg-target.h +++ b/tcg/mips/tcg-target.h @@ -134,7 +134,6 @@ extern bool use_mips32r2_instructions; #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 #define TCG_TARGET_HAS_bswap32_i32 1 -#define TCG_TARGET_HAS_direct_jump 0 #if TCG_TARGET_REG_BITS == 64 #define TCG_TARGET_HAS_add2_i32 0 diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index 5ffb41fb57..f253184915 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -108,7 +108,6 @@ extern bool have_vsx; #define TCG_TARGET_HAS_muls2_i32 0 #define TCG_TARGET_HAS_muluh_i32 1 #define TCG_TARGET_HAS_mulsh_i32 1 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index e6a3915859..136fe54d4b 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1302,7 +1302,6 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) static void tcg_out_goto_tb(TCGContext *s, int which) { - qemu_build_assert(!TCG_TARGET_HAS_direct_jump); /* indirect jump method */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, get_jmp_target_addr(s, which)); diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h index c9af6d592f..1337bc1f1e 100644 --- a/tcg/riscv/tcg-target.h +++ b/tcg/riscv/tcg-target.h @@ -121,7 +121,6 @@ typedef enum { #define TCG_TARGET_HAS_clz_i32 0 #define TCG_TARGET_HAS_ctz_i32 0 #define TCG_TARGET_HAS_ctpop_i32 0 -#define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_brcond2 1 #define TCG_TARGET_HAS_setcond2 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc index 2d049a4cc7..218318feb2 100644 --- a/tcg/s390x/tcg-target.c.inc +++ b/tcg/s390x/tcg-target.c.inc @@ -1973,6 +1973,9 @@ static void tcg_out_goto_tb(TCGContext *s, int which) void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { + if (!HAVE_FACILITY(GEN_INST_EXT)) { + return; + } /* patch the branch destination */ uintptr_t addr = tb->jmp_target_addr[n]; intptr_t disp = addr - (jmp_rx - 2); diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h index 9f5d1cf1c7..e597e47e60 100644 --- a/tcg/s390x/tcg-target.h +++ b/tcg/s390x/tcg-target.h @@ -105,7 +105,6 @@ extern uint64_t s390_facilities[3]; #define TCG_TARGET_HAS_mulsh_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 0 #define TCG_TARGET_HAS_extrh_i64_i32 0 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_div2_i64 1 diff --git a/tcg/sparc64/tcg-target.h b/tcg/sparc64/tcg-target.h index b78a545581..1d6a5c8b07 100644 --- a/tcg/sparc64/tcg-target.h +++ b/tcg/sparc64/tcg-target.h @@ -111,7 +111,6 @@ extern bool use_vis3_instructions; #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_direct_jump 1 #define TCG_TARGET_HAS_qemu_st8_i32 0 #define TCG_TARGET_HAS_extrl_i64_i32 1 diff --git a/tcg/tcg.c b/tcg/tcg.c index af2af99583..d502327be2 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -320,7 +320,6 @@ static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which) * We will check for overflow at the end of the opcode loop in * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX. */ - tcg_debug_assert(TCG_TARGET_HAS_direct_jump); s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s); } diff --git a/tcg/tci/tcg-target.c.inc b/tcg/tci/tcg-target.c.inc index 54779d86d9..bc452007c6 100644 --- a/tcg/tci/tcg-target.c.inc +++ b/tcg/tci/tcg-target.c.inc @@ -597,7 +597,6 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) static void tcg_out_goto_tb(TCGContext *s, int which) { - qemu_build_assert(!TCG_TARGET_HAS_direct_jump); /* indirect jump method. */ tcg_out_op_p(s, INDEX_op_goto_tb, (void *)get_jmp_target_addr(s, which)); set_jmp_reset_offset(s, which); diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h index 359d62c2f3..1414ab4d5b 100644 --- a/tcg/tci/tcg-target.h +++ b/tcg/tci/tcg-target.h @@ -82,7 +82,6 @@ #define TCG_TARGET_HAS_muls2_i32 1 #define TCG_TARGET_HAS_muluh_i32 0 #define TCG_TARGET_HAS_mulsh_i32 0 -#define TCG_TARGET_HAS_direct_jump 0 #define TCG_TARGET_HAS_qemu_st8_i32 0 #if TCG_TARGET_REG_BITS == 64 From d59d83a1c38869b1e1a4f957eb939aaa8a342721 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 5 Dec 2022 17:26:23 -0600 Subject: [PATCH 16/22] tcg/aarch64: Reorg goto_tb implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old implementation replaces two insns, swapping between b nop br x30 and adrp x30, addi x30, x30, lo12: br x30 There is a race condition in which a thread could be stopped at the PC of the second insn, and when restarted does not see the complete address computation and branches to nowhere. The new implemetation replaces only one insn, swapping between b br tmp and ldr tmp, br tmp Reported-by: hev Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/aarch64/tcg-target.c.inc | 66 +++++++++++++++--------------------- tcg/aarch64/tcg-target.h | 2 +- 2 files changed, 29 insertions(+), 39 deletions(-) diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc index 0b65f2cac1..330d26b395 100644 --- a/tcg/aarch64/tcg-target.c.inc +++ b/tcg/aarch64/tcg-target.c.inc @@ -1353,33 +1353,6 @@ static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target, tcg_out_call_int(s, target); } -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t jmp_rx, uintptr_t jmp_rw) -{ - uintptr_t addr = tb->jmp_target_addr[n]; - tcg_insn_unit i1, i2; - TCGType rt = TCG_TYPE_I64; - TCGReg rd = TCG_REG_TMP; - uint64_t pair; - - ptrdiff_t offset = addr - jmp_rx; - - if (offset == sextract64(offset, 0, 26)) { - i1 = I3206_B | ((offset >> 2) & 0x3ffffff); - i2 = NOP; - } else { - offset = (addr >> 12) - (jmp_rx >> 12); - - /* patch ADRP */ - i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd; - /* patch ADDI */ - i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd; - } - pair = (uint64_t)i2 << 32 | i1; - qatomic_set((uint64_t *)jmp_rw, pair); - flush_idcache_range(jmp_rx, jmp_rw, 8); -} - static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) { if (!l->has_value) { @@ -1902,23 +1875,40 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) static void tcg_out_goto_tb(TCGContext *s, int which) { /* - * Ensure that ADRP+ADD are 8-byte aligned so that an atomic - * write can be used to patch the target address. + * Direct branch, or indirect address load, will be patched + * by tb_target_set_jmp_target. Assert indirect load offset + * in range early, regardless of direct branch distance. */ - if ((uintptr_t)s->code_ptr & 7) { - tcg_out32(s, NOP); - } + intptr_t i_off = tcg_pcrel_diff(s, (void *)get_jmp_target_addr(s, which)); + tcg_debug_assert(i_off == sextract64(i_off, 0, 21)); + set_jmp_insn_offset(s, which); - /* - * actual branch destination will be patched by - * tb_target_set_jmp_target later - */ - tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0); - tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0); + tcg_out32(s, I3206_B); tcg_out_insn(s, 3207, BR, TCG_REG_TMP); set_jmp_reset_offset(s, which); } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + uintptr_t d_addr = tb->jmp_target_addr[n]; + ptrdiff_t d_offset = d_addr - jmp_rx; + tcg_insn_unit insn; + + /* Either directly branch, or indirect branch load. */ + if (d_offset == sextract64(d_offset, 0, 28)) { + insn = deposit32(I3206_B, 0, 26, d_offset >> 2); + } else { + uintptr_t i_addr = (uintptr_t)&tb->jmp_target_addr[n]; + ptrdiff_t i_offset = i_addr - jmp_rx; + + /* Note that we asserted this in range in tcg_out_goto_tb. */ + insn = deposit32(I3305_LDR | TCG_REG_TMP, 0, 5, i_offset >> 2); + } + qatomic_set((uint32_t *)jmp_rw, insn); + flush_idcache_range(jmp_rx, jmp_rw, 4); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h index 6067446b03..8d244292aa 100644 --- a/tcg/aarch64/tcg-target.h +++ b/tcg/aarch64/tcg-target.h @@ -15,7 +15,7 @@ #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 24 -#define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) +#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) typedef enum { TCG_REG_X0, TCG_REG_X1, TCG_REG_X2, TCG_REG_X3, From 20b6643324a79860dcdfe811ffe4a79942bca21e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 5 Dec 2022 17:45:02 -0600 Subject: [PATCH 17/22] tcg/ppc: Reorg goto_tb implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old ppc64 implementation replaces 2 or 4 insns, which leaves a race condition in which a thread could be stopped at a PC in the middle of the sequence, and when restarted does not see the complete address computation and branches to nowhere. The new implemetation replaces only one insn, swapping between b and mtctr r31 falling through to a general-case indirect branch. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/ppc/tcg-target.c.inc | 158 +++++++++++---------------------------- tcg/ppc/tcg-target.h | 3 +- 2 files changed, 44 insertions(+), 117 deletions(-) diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc index 6f2c8faea6..8d6899cf40 100644 --- a/tcg/ppc/tcg-target.c.inc +++ b/tcg/ppc/tcg-target.c.inc @@ -1854,104 +1854,6 @@ static void tcg_out_mb(TCGContext *s, TCGArg a0) tcg_out32(s, insn); } -static inline uint64_t make_pair(tcg_insn_unit i1, tcg_insn_unit i2) -{ - if (HOST_BIG_ENDIAN) { - return (uint64_t)i1 << 32 | i2; - } - return (uint64_t)i2 << 32 | i1; -} - -static inline void ppc64_replace2(uintptr_t rx, uintptr_t rw, - tcg_insn_unit i0, tcg_insn_unit i1) -{ -#if TCG_TARGET_REG_BITS == 64 - qatomic_set((uint64_t *)rw, make_pair(i0, i1)); - flush_idcache_range(rx, rw, 8); -#else - qemu_build_not_reached(); -#endif -} - -static inline void ppc64_replace4(uintptr_t rx, uintptr_t rw, - tcg_insn_unit i0, tcg_insn_unit i1, - tcg_insn_unit i2, tcg_insn_unit i3) -{ - uint64_t p[2]; - - p[!HOST_BIG_ENDIAN] = make_pair(i0, i1); - p[HOST_BIG_ENDIAN] = make_pair(i2, i3); - - /* - * There's no convenient way to get the compiler to allocate a pair - * of registers at an even index, so copy into r6/r7 and clobber. - */ - asm("mr %%r6, %1\n\t" - "mr %%r7, %2\n\t" - "stq %%r6, %0" - : "=Q"(*(__int128 *)rw) : "r"(p[0]), "r"(p[1]) : "r6", "r7"); - flush_idcache_range(rx, rw, 16); -} - -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t jmp_rx, uintptr_t jmp_rw) -{ - tcg_insn_unit i0, i1, i2, i3; - uintptr_t addr = tb->jmp_target_addr[n]; - intptr_t tb_diff = addr - (uintptr_t)tb->tc.ptr; - intptr_t br_diff = addr - (jmp_rx + 4); - intptr_t lo, hi; - - if (TCG_TARGET_REG_BITS == 32) { - intptr_t diff = addr - jmp_rx; - tcg_debug_assert(in_range_b(diff)); - qatomic_set((uint32_t *)jmp_rw, B | (diff & 0x3fffffc)); - flush_idcache_range(jmp_rx, jmp_rw, 4); - return; - } - - /* - * For 16-bit displacements, we can use a single add + branch. - * This happens quite often. - */ - if (tb_diff == (int16_t)tb_diff) { - i0 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, tb_diff); - i1 = B | (br_diff & 0x3fffffc); - ppc64_replace2(jmp_rx, jmp_rw, i0, i1); - return; - } - - lo = (int16_t)tb_diff; - hi = (int32_t)(tb_diff - lo); - assert(tb_diff == hi + lo); - i0 = ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, hi >> 16); - i1 = ADDI | TAI(TCG_REG_TB, TCG_REG_TB, lo); - - /* - * Without stq from 2.07, we can only update two insns, - * and those must be the ones that load the target address. - */ - if (!have_isa_2_07) { - ppc64_replace2(jmp_rx, jmp_rw, i0, i1); - return; - } - - /* - * For 26-bit displacements, we can use a direct branch. - * Otherwise we still need the indirect branch, which we - * must restore after a potential direct branch write. - */ - br_diff -= 4; - if (in_range_b(br_diff)) { - i2 = B | (br_diff & 0x3fffffc); - i3 = NOP; - } else { - i2 = MTSPR | RS(TCG_REG_TB) | CTR; - i3 = BCCTR | BO_ALWAYS; - } - ppc64_replace4(jmp_rx, jmp_rw, i0, i1, i2, i3); -} - static void tcg_out_call_int(TCGContext *s, int lk, const tcg_insn_unit *target) { @@ -2625,30 +2527,56 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) static void tcg_out_goto_tb(TCGContext *s, int which) { - /* Direct jump. */ - if (TCG_TARGET_REG_BITS == 64) { - /* Ensure the next insns are 8 or 16-byte aligned. */ - while ((uintptr_t)s->code_ptr & (have_isa_2_07 ? 15 : 7)) { - tcg_out32(s, NOP); - } + uintptr_t ptr = get_jmp_target_addr(s, which); + + if (USE_REG_TB) { + ptrdiff_t offset = tcg_tbrel_diff(s, (void *)ptr); + tcg_out_mem_long(s, LD, LDX, TCG_REG_TB, TCG_REG_TB, offset); + + /* Direct branch will be patched by tb_target_set_jmp_target. */ set_jmp_insn_offset(s, which); - tcg_out32(s, ADDIS | TAI(TCG_REG_TB, TCG_REG_TB, 0)); - tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, 0)); tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR); + + /* When branch is out of range, fall through to indirect. */ + tcg_out32(s, BCCTR | BO_ALWAYS); + + /* For the unlinked case, need to reset TCG_REG_TB. */ + set_jmp_reset_offset(s, which); + tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, + -tcg_current_code_size(s)); + } else { + /* Direct branch will be patched by tb_target_set_jmp_target. */ + set_jmp_insn_offset(s, which); + tcg_out32(s, NOP); + + /* When branch is out of range, fall through to indirect. */ + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP1, ptr - (int16_t)ptr); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP1, TCG_REG_TMP1, (int16_t)ptr); + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); tcg_out32(s, BCCTR | BO_ALWAYS); set_jmp_reset_offset(s, which); - if (USE_REG_TB) { - /* For the unlinked case, need to reset TCG_REG_TB. */ - tcg_out_mem_long(s, ADDI, ADD, TCG_REG_TB, TCG_REG_TB, - -tcg_current_code_size(s)); - } - } else { - set_jmp_insn_offset(s, which); - tcg_out32(s, B); - set_jmp_reset_offset(s, which); } } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + uintptr_t addr = tb->jmp_target_addr[n]; + intptr_t diff = addr - jmp_rx; + tcg_insn_unit insn; + + if (in_range_b(diff)) { + insn = B | (diff & 0x3fffffc); + } else if (USE_REG_TB) { + insn = MTSPR | RS(TCG_REG_TB) | CTR; + } else { + insn = NOP; + } + + qatomic_set((uint32_t *)jmp_rw, insn); + flush_idcache_range(jmp_rx, jmp_rw, 4); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h index f253184915..af81c5a57f 100644 --- a/tcg/ppc/tcg-target.h +++ b/tcg/ppc/tcg-target.h @@ -27,11 +27,10 @@ #ifdef _ARCH_PPC64 # define TCG_TARGET_REG_BITS 64 -# define MAX_CODE_GEN_BUFFER_SIZE (2 * GiB) #else # define TCG_TARGET_REG_BITS 32 -# define MAX_CODE_GEN_BUFFER_SIZE (32 * MiB) #endif +#define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) #define TCG_TARGET_NB_REGS 64 #define TCG_TARGET_INSN_UNIT_SIZE 4 From 1e42b4f8079f7486689693187572452bbec2f158 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 27 Nov 2022 08:34:00 +0300 Subject: [PATCH 18/22] tcg/sparc64: Remove USE_REG_TB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is always true for sparc64, so this is dead since 3a5f6805c7ca. Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target.c.inc | 62 ++++++++++++------------------------ 1 file changed, 21 insertions(+), 41 deletions(-) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index fdb711bdf6..e0b3957149 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -92,7 +92,6 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #endif #define TCG_REG_TB TCG_REG_I1 -#define USE_REG_TB (sizeof(void *) > 4) static const int tcg_target_reg_alloc_order[] = { TCG_REG_L0, @@ -439,7 +438,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } /* A 13-bit constant relative to the TB. */ - if (!in_prologue && USE_REG_TB) { + if (!in_prologue) { test = tcg_tbrel_diff(s, (void *)arg); if (check_fit_ptr(test, 13)) { tcg_out_arithi(s, ret, TCG_REG_TB, test, ARITH_ADD); @@ -468,7 +467,7 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret, } /* Use the constant pool, if possible. */ - if (!in_prologue && USE_REG_TB) { + if (!in_prologue) { new_pool_label(s, arg, R_SPARC_13, s->code_ptr, tcg_tbrel_diff(s, NULL)); tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(TCG_REG_TB)); @@ -1015,10 +1014,8 @@ static void tcg_target_qemu_prologue(TCGContext *s) #endif /* We choose TCG_REG_TB such that no move is required. */ - if (USE_REG_TB) { - QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1); - tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); - } + QEMU_BUILD_BUG_ON(TCG_REG_TB != TCG_REG_I1); + tcg_regset_set_reg(s->reserved_regs, TCG_REG_TB); tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I1, 0, JMPL); /* delay slot */ @@ -1423,7 +1420,7 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); tcg_out_movi_imm13(s, TCG_REG_O0, a0); return; - } else if (USE_REG_TB) { + } else { intptr_t tb_diff = tcg_tbrel_diff(s, (void *)a0); if (check_fit_ptr(tb_diff, 13)) { tcg_out_arithi(s, TCG_REG_G0, TCG_REG_I7, 8, RETURN); @@ -1439,36 +1436,30 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) static void tcg_out_goto_tb(TCGContext *s, int which) { + int c; + /* Direct jump. */ - if (USE_REG_TB) { - /* make sure the patch is 8-byte aligned. */ - if ((intptr_t)s->code_ptr & 4) { - tcg_out_nop(s); - } - set_jmp_insn_offset(s, which); - tcg_out_sethi(s, TCG_REG_T1, 0); - tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); - tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); - tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); - } else { - set_jmp_insn_offset(s, which); - tcg_out32(s, CALL); + /* make sure the patch is 8-byte aligned. */ + if ((intptr_t)s->code_ptr & 4) { tcg_out_nop(s); } + set_jmp_insn_offset(s, which); + tcg_out_sethi(s, TCG_REG_T1, 0); + tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); + tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); + tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); set_jmp_reset_offset(s, which); /* * For the unlinked path of goto_tb, we need to reset TCG_REG_TB * to the beginning of this TB. */ - if (USE_REG_TB) { - int c = -tcg_current_code_size(s); - if (check_fit_i32(c, 13)) { - tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD); - } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c); - tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); - } + c = -tcg_current_code_size(s); + if (check_fit_i32(c, 13)) { + tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD); + } else { + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c); + tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); } } @@ -1488,11 +1479,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, switch (opc) { case INDEX_op_goto_ptr: tcg_out_arithi(s, TCG_REG_G0, a0, 0, JMPL); - if (USE_REG_TB) { - tcg_out_mov_delay(s, TCG_REG_TB, a0); - } else { - tcg_out_nop(s); - } + tcg_out_mov_delay(s, TCG_REG_TB, a0); break; case INDEX_op_br: tcg_out_bpcc(s, COND_A, BPCC_PT, arg_label(a0)); @@ -1898,13 +1885,6 @@ void tb_target_set_jmp_target(const TranslationBlock *tb, int n, tcg_debug_assert(tb_disp == (int32_t)tb_disp); tcg_debug_assert(br_disp == (int32_t)br_disp); - if (!USE_REG_TB) { - qatomic_set((uint32_t *)jmp_rw, - deposit32(CALL, 0, 30, br_disp >> 2)); - flush_idcache_range(jmp_rx, jmp_rw, 4); - return; - } - /* This does not exercise the range of the branch, but we do still need to be able to load the new value of TCG_REG_TB. But this does still happen quite often. */ From a228ae3ea7f6fa9e7eda53906471f1cfc400c114 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 5 Dec 2022 18:05:06 -0600 Subject: [PATCH 19/22] tcg/sparc64: Reorg goto_tb implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The old sparc64 implementation may replace two insns, which leaves a race condition in which a thread could be stopped at a PC in the middle of the sequence, and when restarted does not see the complete address computation and branches to nowhere. The new implemetation replaces only one insn, swapping between a direct branch and a direct call. The TCG_REG_TB register is loaded from tb->jmp_target_addr[] in the delay slot. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target.c.inc | 87 +++++++++++++++--------------------- 1 file changed, 37 insertions(+), 50 deletions(-) diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index e0b3957149..dd406bc065 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -1436,33 +1436,56 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) static void tcg_out_goto_tb(TCGContext *s, int which) { - int c; + ptrdiff_t off = tcg_tbrel_diff(s, (void *)get_jmp_target_addr(s, which)); - /* Direct jump. */ - /* make sure the patch is 8-byte aligned. */ - if ((intptr_t)s->code_ptr & 4) { - tcg_out_nop(s); - } + /* Direct branch will be patched by tb_target_set_jmp_target. */ set_jmp_insn_offset(s, which); - tcg_out_sethi(s, TCG_REG_T1, 0); - tcg_out_arithi(s, TCG_REG_T1, TCG_REG_T1, 0, ARITH_OR); - tcg_out_arith(s, TCG_REG_G0, TCG_REG_TB, TCG_REG_T1, JMPL); - tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); + tcg_out32(s, CALL); + /* delay slot */ + tcg_debug_assert(check_fit_ptr(off, 13)); + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TB, TCG_REG_TB, off); set_jmp_reset_offset(s, which); /* * For the unlinked path of goto_tb, we need to reset TCG_REG_TB * to the beginning of this TB. */ - c = -tcg_current_code_size(s); - if (check_fit_i32(c, 13)) { - tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD); + off = -tcg_current_code_size(s); + if (check_fit_i32(off, 13)) { + tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, off, ARITH_ADD); } else { - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, c); + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, off); tcg_out_arith(s, TCG_REG_TB, TCG_REG_TB, TCG_REG_T1, ARITH_ADD); } } +void tb_target_set_jmp_target(const TranslationBlock *tb, int n, + uintptr_t jmp_rx, uintptr_t jmp_rw) +{ + uintptr_t addr = tb->jmp_target_addr[n]; + intptr_t br_disp = (intptr_t)(addr - jmp_rx) >> 2; + tcg_insn_unit insn; + + br_disp >>= 2; + if (check_fit_ptr(br_disp, 19)) { + /* ba,pt %icc, addr */ + insn = deposit32(INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A) + | BPCC_ICC | BPCC_PT, 0, 19, br_disp); + } else if (check_fit_ptr(br_disp, 22)) { + /* ba addr */ + insn = deposit32(INSN_OP(0) | INSN_OP2(2) | INSN_COND(COND_A), + 0, 22, br_disp); + } else { + /* The code_gen_buffer can't be larger than 2GB. */ + tcg_debug_assert(check_fit_ptr(br_disp, 30)); + /* call addr */ + insn = deposit32(CALL, 0, 30, br_disp); + } + + qatomic_set((uint32_t *)jmp_rw, insn); + flush_idcache_range(jmp_rx, jmp_rw, 4); +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], const int const_args[TCG_MAX_OP_ARGS]) @@ -1871,39 +1894,3 @@ void tcg_register_jit(const void *buf, size_t buf_size) { tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame)); } - -void tb_target_set_jmp_target(const TranslationBlock *tb, int n, - uintptr_t jmp_rx, uintptr_t jmp_rw) -{ - uintptr_t addr = tb->jmp_target_addr[n]; - intptr_t tb_disp = addr - (uintptr_t)tb->tc.ptr; - intptr_t br_disp = addr - jmp_rx; - tcg_insn_unit i1, i2; - - /* We can reach the entire address space for ILP32. - For LP64, the code_gen_buffer can't be larger than 2GB. */ - tcg_debug_assert(tb_disp == (int32_t)tb_disp); - tcg_debug_assert(br_disp == (int32_t)br_disp); - - /* This does not exercise the range of the branch, but we do - still need to be able to load the new value of TCG_REG_TB. - But this does still happen quite often. */ - if (check_fit_ptr(tb_disp, 13)) { - /* ba,pt %icc, addr */ - i1 = (INSN_OP(0) | INSN_OP2(1) | INSN_COND(COND_A) - | BPCC_ICC | BPCC_PT | INSN_OFF19(br_disp)); - i2 = (ARITH_ADD | INSN_RD(TCG_REG_TB) | INSN_RS1(TCG_REG_TB) - | INSN_IMM13(tb_disp)); - } else if (tb_disp >= 0) { - i1 = SETHI | INSN_RD(TCG_REG_T1) | ((tb_disp & 0xfffffc00) >> 10); - i2 = (ARITH_OR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1) - | INSN_IMM13(tb_disp & 0x3ff)); - } else { - i1 = SETHI | INSN_RD(TCG_REG_T1) | ((~tb_disp & 0xfffffc00) >> 10); - i2 = (ARITH_XOR | INSN_RD(TCG_REG_T1) | INSN_RS1(TCG_REG_T1) - | INSN_IMM13((tb_disp & 0x3ff) | -0x400)); - } - - qatomic_set((uint64_t *)jmp_rw, deposit64(i2, 32, 32, i1)); - flush_idcache_range(jmp_rx, jmp_rw, 8); -} From 79ffece44472975b582f009b803d454527d43892 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 27 Nov 2022 00:48:47 -0800 Subject: [PATCH 20/22] tcg/arm: Implement direct branch for goto_tb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that tcg can handle direct and indirect goto_tb simultaneously, we can optimistically leave space for a direct branch and fall back to loading the pointer from the TB for an indirect branch. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 52 ++++++++++++++++++++++++++++------------ 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index e1e1c2620d..6abe94137e 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -135,6 +135,8 @@ typedef enum { ARITH_BIC = 0xe << 21, ARITH_MVN = 0xf << 21, + INSN_B = 0x0a000000, + INSN_CLZ = 0x016f0f10, INSN_RBIT = 0x06ff0f30, @@ -546,7 +548,7 @@ static bool tcg_target_const_match(int64_t val, TCGType type, int ct) static void tcg_out_b_imm(TCGContext *s, ARMCond cond, int32_t offset) { - tcg_out32(s, (cond << 28) | 0x0a000000 | + tcg_out32(s, (cond << 28) | INSN_B | (((offset - 8) >> 2) & 0x00ffffff)); } @@ -1941,32 +1943,52 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg) static void tcg_out_goto_tb(TCGContext *s, int which) { - /* Indirect jump method */ - intptr_t ptr, dif, dil; - TCGReg base = TCG_REG_PC; + uintptr_t i_addr; + intptr_t i_disp; - ptr = get_jmp_target_addr(s, which); - dif = tcg_pcrel_diff(s, (void *)ptr) - 8; - dil = sextract32(dif, 0, 12); - if (dif != dil) { + /* Direct branch will be patched by tb_target_set_jmp_target. */ + set_jmp_insn_offset(s, which); + tcg_out32(s, INSN_NOP); + + /* When branch is out of range, fall through to indirect. */ + i_addr = get_jmp_target_addr(s, which); + i_disp = tcg_pcrel_diff(s, (void *)i_addr) - 8; + tcg_debug_assert(i_disp < 0); + if (i_disp >= -0xfff) { + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_PC, i_disp); + } else { /* * The TB is close, but outside the 12 bits addressable by * the load. We can extend this to 20 bits with a sub of a - * shifted immediate from pc. In the vastly unlikely event - * the code requires more than 1MB, we'll use 2 insns and - * be no worse off. + * shifted immediate from pc. */ - base = TCG_REG_R0; - tcg_out_movi32(s, COND_AL, base, ptr - dil); + int h = -i_disp; + int l = h & 0xfff; + + h = encode_imm_nofail(h - l); + tcg_out_dat_imm(s, COND_AL, ARITH_SUB, TCG_REG_R0, TCG_REG_PC, h); + tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, TCG_REG_R0, l); } - tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil); set_jmp_reset_offset(s, which); } void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { - /* Always indirect, nothing to do */ + uintptr_t addr = tb->jmp_target_addr[n]; + ptrdiff_t offset = addr - (jmp_rx + 8); + tcg_insn_unit insn; + + /* Either directly branch, or fall through to indirect branch. */ + if (offset == sextract64(offset, 0, 26)) { + /* B */ + insn = deposit32((COND_AL << 28) | INSN_B, 0, 24, offset >> 2); + } else { + insn = INSN_NOP; + } + + qatomic_set((uint32_t *)jmp_rw, insn); + flush_idcache_range(jmp_rx, jmp_rw, 4); } static void tcg_out_op(TCGContext *s, TCGOpcode opc, From 9ae958e4d7504f87c2ecd5915d8c3ede7007f2e2 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 27 Nov 2022 01:07:25 -0800 Subject: [PATCH 21/22] tcg/riscv: Introduce OPC_NOP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 136fe54d4b..82ca86431e 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -267,6 +267,7 @@ typedef enum { #endif OPC_FENCE = 0x0000000f, + OPC_NOP = OPC_ADDI, /* nop = addi r0,r0,0 */ } RISCVInsn; /* @@ -403,7 +404,7 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int count) { int i; for (i = 0; i < count; ++i) { - p[i] = encode_i(OPC_ADDI, TCG_REG_ZERO, TCG_REG_ZERO, 0); + p[i] = OPC_NOP; } } From 493c9b19a7fb7f387c4fcf57d3836504d5242bf5 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 27 Nov 2022 01:11:56 -0800 Subject: [PATCH 22/22] tcg/riscv: Implement direct branch for goto_tb MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that tcg can handle direct and indirect goto_tb simultaneously, we can optimistically leave space for a direct branch and fall back to loading the pointer from the TB for an indirect branch. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/riscv/tcg-target.c.inc | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tcg/riscv/tcg-target.c.inc b/tcg/riscv/tcg-target.c.inc index 82ca86431e..fc0edd811f 100644 --- a/tcg/riscv/tcg-target.c.inc +++ b/tcg/riscv/tcg-target.c.inc @@ -1303,7 +1303,11 @@ static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0) static void tcg_out_goto_tb(TCGContext *s, int which) { - /* indirect jump method */ + /* Direct branch will be patched by tb_target_set_jmp_target. */ + set_jmp_insn_offset(s, which); + tcg_out32(s, OPC_JAL); + + /* When branch is out of range, fall through to indirect. */ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_TMP0, TCG_REG_ZERO, get_jmp_target_addr(s, which)); tcg_out_opc_imm(s, OPC_JALR, TCG_REG_ZERO, TCG_REG_TMP0, 0); @@ -1313,7 +1317,18 @@ static void tcg_out_goto_tb(TCGContext *s, int which) void tb_target_set_jmp_target(const TranslationBlock *tb, int n, uintptr_t jmp_rx, uintptr_t jmp_rw) { - /* Always indirect, nothing to do */ + uintptr_t addr = tb->jmp_target_addr[n]; + ptrdiff_t offset = addr - jmp_rx; + tcg_insn_unit insn; + + /* Either directly branch, or fall through to indirect branch. */ + if (offset == sextreg(offset, 0, 20)) { + insn = encode_uj(OPC_JAL, TCG_REG_ZERO, offset); + } else { + insn = OPC_NOP; + } + qatomic_set((uint32_t *)jmp_rw, insn); + flush_idcache_range(jmp_rx, jmp_rw, 4); } static void tcg_out_op(TCGContext *s, TCGOpcode opc,