From f47a90dacca8f74210a2675bdde7ab3856872b94 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 14 Sep 2023 07:39:07 -0700 Subject: [PATCH 1/6] accel/tcg: Avoid load of icount_decr if unused MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With CF_NOIRQ and without !CF_USE_ICOUNT, the load isn't used. Avoid emitting it. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/translator.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index 1a6a5448c8..a3983019a5 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -49,12 +49,15 @@ bool translator_io_start(DisasContextBase *db) static TCGOp *gen_tb_start(uint32_t cflags) { - TCGv_i32 count = tcg_temp_new_i32(); + TCGv_i32 count = NULL; TCGOp *icount_start_insn = NULL; - tcg_gen_ld_i32(count, cpu_env, - offsetof(ArchCPU, neg.icount_decr.u32) - - offsetof(ArchCPU, env)); + if ((cflags & CF_USE_ICOUNT) || !(cflags & CF_NOIRQ)) { + count = tcg_temp_new_i32(); + tcg_gen_ld_i32(count, cpu_env, + offsetof(ArchCPU, neg.icount_decr.u32) - + offsetof(ArchCPU, env)); + } if (cflags & CF_USE_ICOUNT) { /* From 5d97e94638100fd3e5b8d76ab30e1066cd4b1823 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 14 Sep 2023 07:48:39 -0700 Subject: [PATCH 2/6] accel/tcg: Hoist CF_MEMI_ONLY check outside translation loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The condition checked is loop invariant; check it only once. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/translator.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index a3983019a5..b6ab9f3d33 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -158,7 +158,13 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, ops->tb_start(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ - plugin_enabled = plugin_gen_tb_start(cpu, db, cflags & CF_MEMI_ONLY); + if (cflags & CF_MEMI_ONLY) { + /* We should only see CF_MEMI_ONLY for io_recompile. */ + assert(cflags & CF_LAST_IO); + plugin_enabled = plugin_gen_tb_start(cpu, db, true); + } else { + plugin_enabled = plugin_gen_tb_start(cpu, db, false); + } while (true) { *max_insns = ++db->num_insns; @@ -176,12 +182,8 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, if (db->num_insns == db->max_insns && (cflags & CF_LAST_IO)) { /* Accept I/O on the last instruction. */ gen_io_start(); - ops->translate_insn(db, cpu); - } else { - /* we should only see CF_MEMI_ONLY for io_recompile */ - tcg_debug_assert(!(cflags & CF_MEMI_ONLY)); - ops->translate_insn(db, cpu); } + ops->translate_insn(db, cpu); /* * We can't instrument after instructions that change control From 0ca41ccf1c555f97873b8e02a47390fd6af4b18f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 14 Sep 2023 08:06:14 -0700 Subject: [PATCH 3/6] accel/tcg: Track current value of can_do_io in the TB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify translator_io_start by recording the current known value of can_do_io within DisasContextBase. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/translator.c | 31 ++++++++++++++----------------- include/exec/translator.h | 2 ++ 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index b6ab9f3d33..850d82e26f 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -16,11 +16,14 @@ #include "tcg/tcg-op-common.h" #include "internal.h" -static void gen_io_start(void) +static void set_can_do_io(DisasContextBase *db, bool val) { - tcg_gen_st_i32(tcg_constant_i32(1), cpu_env, - offsetof(ArchCPU, parent_obj.can_do_io) - - offsetof(ArchCPU, env)); + if (db->saved_can_do_io != val) { + db->saved_can_do_io = val; + tcg_gen_st_i32(tcg_constant_i32(val), cpu_env, + offsetof(ArchCPU, parent_obj.can_do_io) - + offsetof(ArchCPU, env)); + } } bool translator_io_start(DisasContextBase *db) @@ -30,12 +33,8 @@ bool translator_io_start(DisasContextBase *db) if (!(cflags & CF_USE_ICOUNT)) { return false; } - if (db->num_insns == db->max_insns && (cflags & CF_LAST_IO)) { - /* Already started in translator_loop. */ - return true; - } - gen_io_start(); + set_can_do_io(db, true); /* * Ensure that this instruction will be the last in the TB. @@ -47,7 +46,7 @@ bool translator_io_start(DisasContextBase *db) return true; } -static TCGOp *gen_tb_start(uint32_t cflags) +static TCGOp *gen_tb_start(DisasContextBase *db, uint32_t cflags) { TCGv_i32 count = NULL; TCGOp *icount_start_insn = NULL; @@ -91,12 +90,9 @@ static TCGOp *gen_tb_start(uint32_t cflags) * cpu->can_do_io is cleared automatically here at the beginning of * each translation block. The cost is minimal and only paid for * -icount, plus it would be very easy to forget doing it in the - * translator. Doing it here means we don't need a gen_io_end() to - * go with gen_io_start(). + * translator. */ - tcg_gen_st_i32(tcg_constant_i32(0), cpu_env, - offsetof(ArchCPU, parent_obj.can_do_io) - - offsetof(ArchCPU, env)); + set_can_do_io(db, false); } return icount_start_insn; @@ -147,6 +143,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, db->num_insns = 0; db->max_insns = *max_insns; db->singlestep_enabled = cflags & CF_SINGLE_STEP; + db->saved_can_do_io = -1; db->host_addr[0] = host_pc; db->host_addr[1] = NULL; @@ -154,7 +151,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ /* Start translating. */ - icount_start_insn = gen_tb_start(cflags); + icount_start_insn = gen_tb_start(db, cflags); ops->tb_start(db, cpu); tcg_debug_assert(db->is_jmp == DISAS_NEXT); /* no early exit */ @@ -181,7 +178,7 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns, the next instruction. */ if (db->num_insns == db->max_insns && (cflags & CF_LAST_IO)) { /* Accept I/O on the last instruction. */ - gen_io_start(); + set_can_do_io(db, true); } ops->translate_insn(db, cpu); diff --git a/include/exec/translator.h b/include/exec/translator.h index 4e17c4f401..9d9e980819 100644 --- a/include/exec/translator.h +++ b/include/exec/translator.h @@ -72,6 +72,7 @@ typedef enum DisasJumpType { * @num_insns: Number of translated instructions (including current). * @max_insns: Maximum number of instructions to be translated in this TB. * @singlestep_enabled: "Hardware" single stepping enabled. + * @saved_can_do_io: Known value of cpu->neg.can_do_io, or -1 for unknown. * * Architecture-agnostic disassembly context. */ @@ -83,6 +84,7 @@ typedef struct DisasContextBase { int num_insns; int max_insns; bool singlestep_enabled; + int8_t saved_can_do_io; void *host_addr[2]; } DisasContextBase; From a2f99d484c54adda13e62bf75ba512618a3fe470 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 14 Sep 2023 08:26:47 -0700 Subject: [PATCH 4/6] accel/tcg: Improve setting of can_do_io at start of TB MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initialize can_do_io to true if this the TB has CF_LAST_IO and will consist of a single instruction. This avoids a set to 0 followed immediately by a set to 1. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/translator.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index 850d82e26f..dd507cd471 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -87,12 +87,12 @@ static TCGOp *gen_tb_start(DisasContextBase *db, uint32_t cflags) offsetof(ArchCPU, neg.icount_decr.u16.low) - offsetof(ArchCPU, env)); /* - * cpu->can_do_io is cleared automatically here at the beginning of + * cpu->can_do_io is set automatically here at the beginning of * each translation block. The cost is minimal and only paid for * -icount, plus it would be very easy to forget doing it in the * translator. */ - set_can_do_io(db, false); + set_can_do_io(db, db->max_insns == 1 && (cflags & CF_LAST_IO)); } return icount_start_insn; From 200c1f904f46c209cb022e711a48b89e46512902 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 14 Sep 2023 08:36:11 -0700 Subject: [PATCH 5/6] accel/tcg: Always set CF_LAST_IO with CF_NOIRQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without this we can get see loops through cpu_io_recompile, in which the cpu makes no progress. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 2 +- accel/tcg/tb-maint.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index e2c494e75e..c724e8b6f1 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -720,7 +720,7 @@ static inline bool cpu_handle_exception(CPUState *cpu, int *ret) && cpu_neg(cpu)->icount_decr.u16.low + cpu->icount_extra == 0) { /* Execute just one insn to trigger exception pending in the log */ cpu->cflags_next_tb = (curr_cflags(cpu) & ~CF_USE_ICOUNT) - | CF_NOIRQ | 1; + | CF_LAST_IO | CF_NOIRQ | 1; } #endif return false; diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index 32ae8af61c..0c3e227409 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -1083,7 +1083,8 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc) if (current_tb_modified) { /* Force execution of one insn next time. */ CPUState *cpu = current_cpu; - cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu); + cpu->cflags_next_tb = + 1 | CF_LAST_IO | CF_NOIRQ | curr_cflags(current_cpu); return true; } return false; @@ -1153,7 +1154,8 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, if (current_tb_modified) { page_collection_unlock(pages); /* Force execution of one insn next time. */ - current_cpu->cflags_next_tb = 1 | CF_NOIRQ | curr_cflags(current_cpu); + current_cpu->cflags_next_tb = + 1 | CF_LAST_IO | CF_NOIRQ | curr_cflags(current_cpu); mmap_unlock(); cpu_loop_exit_noexc(current_cpu); } From 18a536f1f8d6222e562f59179e837fdfd8b92718 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 12 Sep 2023 19:08:11 -0700 Subject: [PATCH 6/6] accel/tcg: Always require can_do_io MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Require i/o as the last insn of a TranslationBlock always, not only with icount. This is required for i/o that alters the address space, such as a pci config space write. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1866 Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- accel/tcg/translator.c | 20 +++++++------------- target/mips/tcg/translate.c | 1 - 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c index dd507cd471..358214d526 100644 --- a/accel/tcg/translator.c +++ b/accel/tcg/translator.c @@ -28,12 +28,6 @@ static void set_can_do_io(DisasContextBase *db, bool val) bool translator_io_start(DisasContextBase *db) { - uint32_t cflags = tb_cflags(db->tb); - - if (!(cflags & CF_USE_ICOUNT)) { - return false; - } - set_can_do_io(db, true); /* @@ -86,15 +80,15 @@ static TCGOp *gen_tb_start(DisasContextBase *db, uint32_t cflags) tcg_gen_st16_i32(count, cpu_env, offsetof(ArchCPU, neg.icount_decr.u16.low) - offsetof(ArchCPU, env)); - /* - * cpu->can_do_io is set automatically here at the beginning of - * each translation block. The cost is minimal and only paid for - * -icount, plus it would be very easy to forget doing it in the - * translator. - */ - set_can_do_io(db, db->max_insns == 1 && (cflags & CF_LAST_IO)); } + /* + * cpu->can_do_io is set automatically here at the beginning of + * each translation block. The cost is minimal, plus it would be + * very easy to forget doing it in the translator. + */ + set_can_do_io(db, db->max_insns == 1 && (cflags & CF_LAST_IO)); + return icount_start_insn; } diff --git a/target/mips/tcg/translate.c b/target/mips/tcg/translate.c index 9bb40f1849..593fc80458 100644 --- a/target/mips/tcg/translate.c +++ b/target/mips/tcg/translate.c @@ -11212,7 +11212,6 @@ static void gen_branch(DisasContext *ctx, int insn_bytes) /* Branches completion */ clear_branch_hflags(ctx); ctx->base.is_jmp = DISAS_NORETURN; - /* FIXME: Need to clear can_do_io. */ switch (proc_hflags & MIPS_HFLAG_BMASK_BASE) { case MIPS_HFLAG_FBNSLOT: gen_goto_tb(ctx, 0, ctx->base.pc_next + insn_bytes);