From e6e66b03287331abc6f184456dbc6d25505590ec Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Wed, 30 Aug 2023 13:33:54 -0700 Subject: [PATCH 01/14] linux-user: Fixes for zero_bss MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous change, 2d385be6152, assumed !PAGE_VALID meant that the page would be unmapped by the elf image. However, since we reserved the entire image space via mmap, PAGE_VALID will always be set. Instead, assume PROT_NONE for the same condition. Furthermore, assume bss is only ever present for writable segments, and that there is no page overlap between PT_LOAD segments. Instead of an assert, return false to indicate failure. Cc: qemu-stable@nongnu.org Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1854 Fixes: 2d385be6152 ("linux-user: Do not adjust zero_bss for host page size") Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- linux-user/elfload.c | 53 +++++++++++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 13 deletions(-) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index f21e2e0c3d..213fd3e584 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -2362,31 +2362,58 @@ static abi_ulong setup_arg_pages(struct linux_binprm *bprm, * Map and zero the bss. We need to explicitly zero any fractional pages * after the data section (i.e. bss). Return false on mapping failure. */ -static bool zero_bss(abi_ulong start_bss, abi_ulong end_bss, int prot) +static bool zero_bss(abi_ulong start_bss, abi_ulong end_bss, + int prot, Error **errp) { abi_ulong align_bss; + /* We only expect writable bss; the code segment shouldn't need this. */ + if (!(prot & PROT_WRITE)) { + error_setg(errp, "PT_LOAD with non-writable bss"); + return false; + } + align_bss = TARGET_PAGE_ALIGN(start_bss); end_bss = TARGET_PAGE_ALIGN(end_bss); if (start_bss < align_bss) { int flags = page_get_flags(start_bss); - if (!(flags & PAGE_VALID)) { - /* Map the start of the bss. */ + if (!(flags & PAGE_BITS)) { + /* + * The whole address space of the executable was reserved + * at the start, therefore all pages will be VALID. + * But assuming there are no PROT_NONE PT_LOAD segments, + * a PROT_NONE page means no data all bss, and we can + * simply extend the new anon mapping back to the start + * of the page of bss. + */ align_bss -= TARGET_PAGE_SIZE; - } else if (flags & PAGE_WRITE) { - /* The page is already mapped writable. */ - memset(g2h_untagged(start_bss), 0, align_bss - start_bss); } else { - /* Read-only zeros? */ - g_assert_not_reached(); + /* + * The start of the bss shares a page with something. + * The only thing that we expect is the data section, + * which would already be marked writable. + * Overlapping the RX code segment seems malformed. + */ + if (!(flags & PAGE_WRITE)) { + error_setg(errp, "PT_LOAD with bss overlapping " + "non-writable page"); + return false; + } + + /* The page is already mapped and writable. */ + memset(g2h_untagged(start_bss), 0, align_bss - start_bss); } } - return align_bss >= end_bss || - target_mmap(align_bss, end_bss - align_bss, prot, - MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) != -1; + if (align_bss < end_bss && + target_mmap(align_bss, end_bss - align_bss, prot, + MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) { + error_setg_errno(errp, errno, "Error mapping bss"); + return false; + } + return true; } #if defined(TARGET_ARM) @@ -3410,8 +3437,8 @@ static void load_elf_image(const char *image_name, int image_fd, /* If the load segment requests extra zeros (e.g. bss), map it. */ if (vaddr_ef < vaddr_em && - !zero_bss(vaddr_ef, vaddr_em, elf_prot)) { - goto exit_mmap; + !zero_bss(vaddr_ef, vaddr_em, elf_prot, &err)) { + goto exit_errmsg; } /* Find the full program boundaries. */ From 6fad9b4bb91dcc824f9c00a36ee843883b58313b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 28 Sep 2023 21:55:20 +0200 Subject: [PATCH 02/14] linux-user/mips: fix abort on integer overflow QEMU mips userspace emulation crashes with "qemu: unhandled CPU exception 0x15 - aborting" when one of the integer arithmetic instructions detects an overflow. This patch fixes it so that it delivers SIGFPE with FPE_INTOVF instead. Cc: qemu-stable@nongnu.org Signed-off-by: Mikulas Patocka Message-Id: <3ef979a8-3ee1-eb2d-71f7-d788ff88dd11@redhat.com> Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- linux-user/mips/cpu_loop.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/linux-user/mips/cpu_loop.c b/linux-user/mips/cpu_loop.c index 8735e58bad..990b03e727 100644 --- a/linux-user/mips/cpu_loop.c +++ b/linux-user/mips/cpu_loop.c @@ -180,7 +180,9 @@ done_syscall: } force_sig_fault(TARGET_SIGFPE, si_code, env->active_tc.PC); break; - + case EXCP_OVERFLOW: + force_sig_fault(TARGET_SIGFPE, TARGET_FPE_INTOVF, env->active_tc.PC); + break; /* The code below was inspired by the MIPS Linux kernel trap * handling code in arch/mips/kernel/traps.c. */ From 3b894b699c9a9c064466e128c18be80a3f2113bc Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 28 Sep 2023 18:42:08 +0200 Subject: [PATCH 03/14] linux-user/sh4: Fix crashes on signal delivery sh4 uses gUSA (general UserSpace Atomicity) to provide atomicity on CPUs that don't have atomic instructions. A gUSA region that adds 1 to an atomic variable stored in @R2 looks like this: 4004b6: 03 c7 mova 4004c4 ,r0 4004b8: f3 61 mov r15,r1 4004ba: 09 00 nop 4004bc: fa ef mov #-6,r15 4004be: 22 63 mov.l @r2,r3 4004c0: 01 73 add #1,r3 4004c2: 32 22 mov.l r3,@r2 4004c4: 13 6f mov r1,r15 R0 contains a pointer to the end of the gUSA region R1 contains the saved stack pointer R15 contains negative length of the gUSA region When this region is interrupted by a signal, the kernel detects if R15 >= -128U. If yes, the kernel rolls back PC to the beginning of the region and restores SP by copying R1 to R15. The problem happens if we are interrupted by a signal at address 4004c4. R15 still holds the value -6, but the atomic value was already written by an instruction at address 4004c2. In this situation we can't undo the gUSA. The function unwind_gusa does nothing, the signal handler attempts to push a signal frame to the address -6 and crashes. This patch fixes it, so that if we are interrupted at the last instruction in a gUSA region, we copy R1 to R15 to restore the correct stack pointer and avoid crashing. There's another bug: if we are interrupted in a delay slot, we save the address of the instruction in the delay slot. We must save the address of the previous instruction. Cc: qemu-stable@nongnu.org Signed-off-by: Mikulas Patocka Reviewed-by: Yoshinori Sato Message-Id: Reviewed-by: Richard Henderson Signed-off-by: Richard Henderson --- linux-user/sh4/signal.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/linux-user/sh4/signal.c b/linux-user/sh4/signal.c index c4ba962708..c16c2c2d57 100644 --- a/linux-user/sh4/signal.c +++ b/linux-user/sh4/signal.c @@ -104,6 +104,14 @@ static void unwind_gusa(CPUSH4State *regs) /* Reset the SP to the saved version in R1. */ regs->gregs[15] = regs->gregs[1]; + } else if (regs->gregs[15] >= -128u && regs->pc == regs->gregs[0]) { + /* If we are on the last instruction of a gUSA region, we must reset + the SP, otherwise we would be pushing the signal context to + invalid memory. */ + regs->gregs[15] = regs->gregs[1]; + } else if (regs->flags & TB_FLAG_DELAY_SLOT) { + /* If we are in a delay slot, push the previous instruction. */ + regs->pc -= 2; } } From a9f6004f677923f620e0927a626d1bdaa1eb2166 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Sun, 1 Oct 2023 16:53:05 +0800 Subject: [PATCH 04/14] linux-user/elfload: Enable LSX/LASX in HWCAP for LoongArch Since support for LSX and LASX is landed in QEMU recently, we can update HWCAPS accordingly. Signed-off-by: Jiajie Chen Reviewed-by: Richard Henderson Message-Id: <20231001085315.1692667-1-c@jia.je> Signed-off-by: Richard Henderson --- linux-user/elfload.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 213fd3e584..2e3809f03c 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1237,6 +1237,14 @@ static uint32_t get_elf_hwcap(void) hwcaps |= HWCAP_LOONGARCH_LAM; } + if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LSX)) { + hwcaps |= HWCAP_LOONGARCH_LSX; + } + + if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LASX)) { + hwcaps |= HWCAP_LOONGARCH_LASX; + } + return hwcaps; } From 912ff698cae8879d981157fd8cca1354248fddcc Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 3 Oct 2023 13:59:55 -0700 Subject: [PATCH 05/14] linux-user: Propagate failure in mmap_reserve_or_unmap back to target_munmap Do not assert success, but return any failure received. Additionally, fix the method of earlier error return in target_munmap. Reported-by: Andreas Schwab Signed-off-by: Richard Henderson --- linux-user/mmap.c | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 8ccaab7859..7b44b9ff49 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -778,7 +778,7 @@ fail: return -1; } -static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) +static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) { abi_ulong real_start; abi_ulong real_last; @@ -807,7 +807,7 @@ static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) prot |= page_get_flags(a + 1); } if (prot != 0) { - return; + return 0; } } else { for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) { @@ -825,7 +825,7 @@ static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) } if (real_last < real_start) { - return; + return 0; } } @@ -836,32 +836,36 @@ static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len) void *ptr = mmap(host_start, real_len, PROT_NONE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0); - assert(ptr == host_start); - } else { - int ret = munmap(host_start, real_len); - assert(ret == 0); + return ptr == host_start ? 0 : -1; } + return munmap(host_start, real_len); } int target_munmap(abi_ulong start, abi_ulong len) { + int ret; + trace_target_munmap(start, len); if (start & ~TARGET_PAGE_MASK) { - return -TARGET_EINVAL; + errno = EINVAL; + return -1; } len = TARGET_PAGE_ALIGN(len); if (len == 0 || !guest_range_valid_untagged(start, len)) { - return -TARGET_EINVAL; + errno = EINVAL; + return -1; } mmap_lock(); - mmap_reserve_or_unmap(start, len); - page_set_flags(start, start + len - 1, 0); - shm_region_rm_complete(start, start + len - 1); + ret = mmap_reserve_or_unmap(start, len); + if (likely(ret == 0)) { + page_set_flags(start, start + len - 1, 0); + shm_region_rm_complete(start, start + len - 1); + } mmap_unlock(); - return 0; + return ret; } abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, From b8b50f1e9ac8007831c2df1bd0d728f420d5818c Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 22 Aug 2023 12:53:19 -0700 Subject: [PATCH 06/14] linux-user: Split out die_with_signal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Because we trap so many signals for use by the guest, we have to take extra steps to exit properly. Acked-by: Helge Deller Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- linux-user/signal.c | 52 ++++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index a67ab47d30..b7a2c47837 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -689,13 +689,39 @@ void cpu_loop_exit_sigbus(CPUState *cpu, target_ulong addr, } /* abort execution with signal */ +static G_NORETURN +void die_with_signal(int host_sig) +{ + struct sigaction act = { + .sa_handler = SIG_DFL, + }; + + /* + * The proper exit code for dying from an uncaught signal is -. + * The kernel doesn't allow exit() or _exit() to pass a negative value. + * To get the proper exit code we need to actually die from an uncaught + * signal. Here the default signal handler is installed, we send + * the signal and we wait for it to arrive. + */ + sigfillset(&act.sa_mask); + sigaction(host_sig, &act, NULL); + + kill(getpid(), host_sig); + + /* Make sure the signal isn't masked (reusing the mask inside of act). */ + sigdelset(&act.sa_mask, host_sig); + sigsuspend(&act.sa_mask); + + /* unreachable */ + abort(); +} + static G_NORETURN void dump_core_and_abort(CPUArchState *env, int target_sig) { CPUState *cpu = env_cpu(env); TaskState *ts = (TaskState *)cpu->opaque; int host_sig, core_dumped = 0; - struct sigaction act; host_sig = target_to_host_signal(target_sig); trace_user_dump_core_and_abort(env, target_sig, host_sig); @@ -719,29 +745,7 @@ void dump_core_and_abort(CPUArchState *env, int target_sig) } preexit_cleanup(env, 128 + target_sig); - - /* The proper exit code for dying from an uncaught signal is - * -. The kernel doesn't allow exit() or _exit() to pass - * a negative value. To get the proper exit code we need to - * actually die from an uncaught signal. Here the default signal - * handler is installed, we send ourself a signal and we wait for - * it to arrive. */ - sigfillset(&act.sa_mask); - act.sa_handler = SIG_DFL; - act.sa_flags = 0; - sigaction(host_sig, &act, NULL); - - /* For some reason raise(host_sig) doesn't send the signal when - * statically linked on x86-64. */ - kill(getpid(), host_sig); - - /* Make sure the signal isn't masked (just reuse the mask inside - of act) */ - sigdelset(&act.sa_mask, host_sig); - sigsuspend(&act.sa_mask); - - /* unreachable */ - abort(); + die_with_signal(host_sig); } /* queue a signal so that it will be send to the virtual CPU as soon From ee72c47eebe119575b2bb684912bbb8c9efc9ba7 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 22 Aug 2023 13:08:11 -0700 Subject: [PATCH 07/14] linux-user: Exit not abort in die_with_backtrace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This line is supposed to be unreachable, but if we're going to have it at all, SIGABRT via abort() is subject to the same signal peril that created this function in the first place. We can _exit immediately without peril. Acked-by: Helge Deller Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- linux-user/signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index b7a2c47837..84a56b76cc 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -713,7 +713,7 @@ void die_with_signal(int host_sig) sigsuspend(&act.sa_mask); /* unreachable */ - abort(); + _exit(EXIT_FAILURE); } static G_NORETURN From 7dfd3ca8d95f9962cdd2ebdfcdd699279b98fa18 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 12 Aug 2023 18:43:14 +0200 Subject: [PATCH 08/14] linux-user: Detect and report host crashes If there is an internal program error in the qemu source code which raises SIGSEGV or SIGBUS, we currently assume the signal belongs to the guest. With an artificial error introduced, we will now print QEMU internal SIGSEGV {code=MAPERR, addr=(nil)} Signed-off-by: Helge Deller Message-Id: <20230812164314.352131-1-deller@gmx.de> [rth: Use in_code_gen_buffer and die_with_signal; drop backtrace] Signed-off-by: Richard Henderson --- linux-user/signal.c | 69 +++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index 84a56b76cc..9fadc51347 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -32,6 +32,7 @@ #include "signal-common.h" #include "host-signal.h" #include "user/safe-syscall.h" +#include "tcg/tcg.h" static struct target_sigaction sigact_table[TARGET_NSIG]; @@ -779,6 +780,50 @@ static inline void rewind_if_in_safe_syscall(void *puc) } } +static G_NORETURN +void die_from_signal(siginfo_t *info) +{ + char sigbuf[4], codebuf[12]; + const char *sig, *code = NULL; + + switch (info->si_signo) { + case SIGSEGV: + sig = "SEGV"; + switch (info->si_code) { + case SEGV_MAPERR: + code = "MAPERR"; + break; + case SEGV_ACCERR: + code = "ACCERR"; + break; + } + break; + case SIGBUS: + sig = "BUS"; + switch (info->si_code) { + case BUS_ADRALN: + code = "ADRALN"; + break; + case BUS_ADRERR: + code = "ADRERR"; + break; + } + break; + default: + snprintf(sigbuf, sizeof(sigbuf), "%d", info->si_signo); + sig = sigbuf; + break; + } + if (code == NULL) { + snprintf(codebuf, sizeof(sigbuf), "%d", info->si_code); + code = codebuf; + } + + error_report("QEMU internal SIG%s {code=%s, addr=%p}", + sig, code, info->si_addr); + die_with_signal(info->si_signo); +} + static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) { CPUState *cpu = thread_cpu; @@ -814,16 +859,28 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) is_write = host_signal_write(info, uc); access_type = adjust_signal_pc(&pc, is_write); + /* If this was a write to a TB protected page, restart. */ + if (is_write + && host_sig == SIGSEGV + && info->si_code == SEGV_ACCERR + && h2g_valid(host_addr) + && handle_sigsegv_accerr_write(cpu, sigmask, pc, guest_addr)) { + return; + } + + /* + * If the access was not on behalf of the guest, within the executable + * mapping of the generated code buffer, then it is a host bug. + */ + if (access_type != MMU_INST_FETCH + && !in_code_gen_buffer((void *)(pc - tcg_splitwx_diff))) { + die_from_signal(info); + } + if (host_sig == SIGSEGV) { bool maperr = true; if (info->si_code == SEGV_ACCERR && h2g_valid(host_addr)) { - /* If this was a write to a TB protected page, restart. */ - if (is_write && - handle_sigsegv_accerr_write(cpu, sigmask, pc, guest_addr)) { - return; - } - /* * With reserved_va, the whole address space is PROT_NONE, * which means that we may get ACCERR when we want MAPERR. From dbde2c0c0eec5f9d6228fd418a127def561ea516 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 22 Aug 2023 14:55:03 -0700 Subject: [PATCH 09/14] linux-user: Only register handlers for core_dump_signal by default The set of fatal signals is really immaterial. If one arrives, and is unhandled, then the qemu process dies and the parent gets the correct signal. It is only for those signals which we would like to perform a guest core dump instead of a host core dump that we need to catch. Acked-by: Helge Deller Signed-off-by: Richard Henderson --- linux-user/signal.c | 43 ++++++++++++++----------------------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index 9fadc51347..aab05f8eec 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -488,26 +488,6 @@ void target_to_host_siginfo(siginfo_t *info, const target_siginfo_t *tinfo) info->si_value.sival_ptr = (void *)(long)sival_ptr; } -static int fatal_signal (int sig) -{ - switch (sig) { - case TARGET_SIGCHLD: - case TARGET_SIGURG: - case TARGET_SIGWINCH: - /* Ignored by default. */ - return 0; - case TARGET_SIGCONT: - case TARGET_SIGSTOP: - case TARGET_SIGTSTP: - case TARGET_SIGTTIN: - case TARGET_SIGTTOU: - /* Job control signals. */ - return 0; - default: - return 1; - } -} - /* returns 1 if given signal should dump core if not handled */ static int core_dump_signal(int sig) { @@ -602,8 +582,9 @@ void signal_init(void) SIGSEGV and SIGBUS, to detect exceptions. We can not just trap all signals because it affects syscall interrupt behavior. But do trap all default-fatal signals. */ - if (fatal_signal (i)) + if (core_dump_signal(i)) { sigaction(host_sig, &act, NULL); + } } } @@ -997,7 +978,6 @@ int do_sigaction(int sig, const struct target_sigaction *act, struct target_sigaction *oact, abi_ulong ka_restorer) { struct target_sigaction *k; - struct sigaction act1; int host_sig; int ret = 0; @@ -1057,22 +1037,27 @@ int do_sigaction(int sig, const struct target_sigaction *act, return 0; } if (host_sig != SIGSEGV && host_sig != SIGBUS) { + struct sigaction act1; + sigfillset(&act1.sa_mask); act1.sa_flags = SA_SIGINFO; - if (k->sa_flags & TARGET_SA_RESTART) - act1.sa_flags |= SA_RESTART; - /* NOTE: it is important to update the host kernel signal - ignore state to avoid getting unexpected interrupted - syscalls */ if (k->_sa_handler == TARGET_SIG_IGN) { + /* + * It is important to update the host kernel signal ignore + * state to avoid getting unexpected interrupted syscalls. + */ act1.sa_sigaction = (void *)SIG_IGN; } else if (k->_sa_handler == TARGET_SIG_DFL) { - if (fatal_signal (sig)) + if (core_dump_signal(sig)) { act1.sa_sigaction = host_signal_handler; - else + } else { act1.sa_sigaction = (void *)SIG_DFL; + } } else { act1.sa_sigaction = host_signal_handler; + if (k->sa_flags & TARGET_SA_RESTART) { + act1.sa_flags |= SA_RESTART; + } } ret = sigaction(host_sig, &act1, NULL); } From b60b91aa8f3f053064ab3c6be4b2e388a82ff6b4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 22 Aug 2023 20:45:38 -0700 Subject: [PATCH 10/14] linux-user: Map unsupported signals to an out-of-bounds value Do not return a valid signal number in one domain when given an invalid signal number in the other domain. Acked-by: Helge Deller Signed-off-by: Richard Henderson --- linux-user/signal.c | 72 ++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 34 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index aab05f8eec..653fd2f9fd 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -44,9 +44,8 @@ abi_ulong default_sigreturn; abi_ulong default_rt_sigreturn; /* - * System includes define _NSIG as SIGRTMAX + 1, - * but qemu (like the kernel) defines TARGET_NSIG as TARGET_SIGRTMAX - * and the first signal is SIGHUP defined as 1 + * System includes define _NSIG as SIGRTMAX + 1, but qemu (like the kernel) + * defines TARGET_NSIG as TARGET_SIGRTMAX and the first signal is 1. * Signal number 0 is reserved for use as kill(pid, 0), to test whether * a process exists without sending it a signal. */ @@ -57,7 +56,6 @@ static uint8_t host_to_target_signal_table[_NSIG] = { #define MAKE_SIG_ENTRY(sig) [sig] = TARGET_##sig, MAKE_SIGNAL_LIST #undef MAKE_SIG_ENTRY - /* next signals stay the same */ }; static uint8_t target_to_host_signal_table[TARGET_NSIG + 1]; @@ -65,18 +63,24 @@ static uint8_t target_to_host_signal_table[TARGET_NSIG + 1]; /* valid sig is between 1 and _NSIG - 1 */ int host_to_target_signal(int sig) { - if (sig < 1 || sig >= _NSIG) { + if (sig < 1) { return sig; } + if (sig >= _NSIG) { + return TARGET_NSIG + 1; + } return host_to_target_signal_table[sig]; } /* valid sig is between 1 and TARGET_NSIG */ int target_to_host_signal(int sig) { - if (sig < 1 || sig > TARGET_NSIG) { + if (sig < 1) { return sig; } + if (sig > TARGET_NSIG) { + return _NSIG; + } return target_to_host_signal_table[sig]; } @@ -507,48 +511,48 @@ static int core_dump_signal(int sig) static void signal_table_init(void) { - int host_sig, target_sig, count; + int hsig, tsig, count; /* * Signals are supported starting from TARGET_SIGRTMIN and going up - * until we run out of host realtime signals. - * glibc at least uses only the lower 2 rt signals and probably - * nobody's using the upper ones. - * it's why SIGRTMIN (34) is generally greater than __SIGRTMIN (32) - * To fix this properly we need to do manual signal delivery multiplexed - * over a single host signal. + * until we run out of host realtime signals. Glibc uses the lower 2 + * RT signals and (hopefully) nobody uses the upper ones. + * This is why SIGRTMIN (34) is generally greater than __SIGRTMIN (32). + * To fix this properly we would need to do manual signal delivery + * multiplexed over a single host signal. * Attempts for configure "missing" signals via sigaction will be * silently ignored. */ - for (host_sig = SIGRTMIN; host_sig <= SIGRTMAX; host_sig++) { - target_sig = host_sig - SIGRTMIN + TARGET_SIGRTMIN; - if (target_sig <= TARGET_NSIG) { - host_to_target_signal_table[host_sig] = target_sig; + for (hsig = SIGRTMIN; hsig <= SIGRTMAX; hsig++) { + tsig = hsig - SIGRTMIN + TARGET_SIGRTMIN; + if (tsig <= TARGET_NSIG) { + host_to_target_signal_table[hsig] = tsig; } } - /* generate signal conversion tables */ - for (target_sig = 1; target_sig <= TARGET_NSIG; target_sig++) { - target_to_host_signal_table[target_sig] = _NSIG; /* poison */ - } - for (host_sig = 1; host_sig < _NSIG; host_sig++) { - if (host_to_target_signal_table[host_sig] == 0) { - host_to_target_signal_table[host_sig] = host_sig; - } - target_sig = host_to_target_signal_table[host_sig]; - if (target_sig <= TARGET_NSIG) { - target_to_host_signal_table[target_sig] = host_sig; + /* Invert the mapping that has already been assigned. */ + for (hsig = 1; hsig < _NSIG; hsig++) { + tsig = host_to_target_signal_table[hsig]; + if (tsig) { + assert(target_to_host_signal_table[tsig] == 0); + target_to_host_signal_table[tsig] = hsig; } } - if (trace_event_get_state_backends(TRACE_SIGNAL_TABLE_INIT)) { - for (target_sig = 1, count = 0; target_sig <= TARGET_NSIG; target_sig++) { - if (target_to_host_signal_table[target_sig] == _NSIG) { - count++; - } + /* Map everything else out-of-bounds. */ + for (hsig = 1; hsig < _NSIG; hsig++) { + if (host_to_target_signal_table[hsig] == 0) { + host_to_target_signal_table[hsig] = TARGET_NSIG + 1; } - trace_signal_table_init(count); } + for (count = 0, tsig = 1; tsig <= TARGET_NSIG; tsig++) { + if (target_to_host_signal_table[tsig] == 0) { + target_to_host_signal_table[tsig] = _NSIG; + count++; + } + } + + trace_signal_table_init(count); } void signal_init(void) From 58c4e36c4e7350d2e7c8ba2d72b74a347d687b68 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 22 Aug 2023 21:20:47 -0700 Subject: [PATCH 11/14] linux-user: Simplify signal_init Install the host signal handler at the same time we are probing the target signals for SIG_IGN/SIG_DFL. Ignore unmapped target signals. Acked-by: Helge Deller Signed-off-by: Richard Henderson --- linux-user/signal.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index 653fd2f9fd..09840b0eb0 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -558,10 +558,7 @@ static void signal_table_init(void) void signal_init(void) { TaskState *ts = (TaskState *)thread_cpu->opaque; - struct sigaction act; - struct sigaction oact; - int i; - int host_sig; + struct sigaction act, oact; /* initialize signal conversion tables */ signal_table_init(); @@ -572,23 +569,28 @@ void signal_init(void) sigfillset(&act.sa_mask); act.sa_flags = SA_SIGINFO; act.sa_sigaction = host_signal_handler; - for(i = 1; i <= TARGET_NSIG; i++) { - host_sig = target_to_host_signal(i); - sigaction(host_sig, NULL, &oact); - if (oact.sa_sigaction == (void *)SIG_IGN) { - sigact_table[i - 1]._sa_handler = TARGET_SIG_IGN; - } else if (oact.sa_sigaction == (void *)SIG_DFL) { - sigact_table[i - 1]._sa_handler = TARGET_SIG_DFL; - } - /* If there's already a handler installed then something has - gone horribly wrong, so don't even try to handle that case. */ - /* Install some handlers for our own use. We need at least - SIGSEGV and SIGBUS, to detect exceptions. We can not just - trap all signals because it affects syscall interrupt - behavior. But do trap all default-fatal signals. */ - if (core_dump_signal(i)) { - sigaction(host_sig, &act, NULL); + + /* + * A parent process may configure ignored signals, but all other + * signals are default. For any target signals that have no host + * mapping, set to ignore. For all core_dump_signal, install our + * host signal handler so that we may invoke dump_core_and_abort. + * This includes SIGSEGV and SIGBUS, which are also need our signal + * handler for paging and exceptions. + */ + for (int tsig = 1; tsig <= TARGET_NSIG; tsig++) { + int hsig = target_to_host_signal(tsig); + abi_ptr thand = TARGET_SIG_IGN; + + if (hsig < _NSIG) { + struct sigaction *iact = core_dump_signal(tsig) ? &act : NULL; + + sigaction(hsig, iact, &oact); + if (oact.sa_sigaction != (void *)SIG_IGN) { + thand = TARGET_SIG_DFL; + } } + sigact_table[tsig - 1]._sa_handler = thand; } } From f4e1168198a6306c3e337f2b91b1213f5bef52af Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 22 Aug 2023 21:56:10 -0700 Subject: [PATCH 12/14] linux-user: Split out host_sig{segv,bus}_handler Make host_signal_handler slightly easier to read. Acked-by: Helge Deller Signed-off-by: Richard Henderson --- linux-user/signal.c | 145 ++++++++++++++++++++++++++------------------ 1 file changed, 85 insertions(+), 60 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index 09840b0eb0..706b8ac7a7 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -811,6 +811,80 @@ void die_from_signal(siginfo_t *info) die_with_signal(info->si_signo); } +static void host_sigsegv_handler(CPUState *cpu, siginfo_t *info, + host_sigcontext *uc) +{ + uintptr_t host_addr = (uintptr_t)info->si_addr; + /* + * Convert forcefully to guest address space: addresses outside + * reserved_va are still valid to report via SEGV_MAPERR. + */ + bool is_valid = h2g_valid(host_addr); + abi_ptr guest_addr = h2g_nocheck(host_addr); + uintptr_t pc = host_signal_pc(uc); + bool is_write = host_signal_write(info, uc); + MMUAccessType access_type = adjust_signal_pc(&pc, is_write); + bool maperr; + + /* If this was a write to a TB protected page, restart. */ + if (is_write + && is_valid + && info->si_code == SEGV_ACCERR + && handle_sigsegv_accerr_write(cpu, host_signal_mask(uc), + pc, guest_addr)) { + return; + } + + /* + * If the access was not on behalf of the guest, within the executable + * mapping of the generated code buffer, then it is a host bug. + */ + if (access_type != MMU_INST_FETCH + && !in_code_gen_buffer((void *)(pc - tcg_splitwx_diff))) { + die_from_signal(info); + } + + maperr = true; + if (is_valid && info->si_code == SEGV_ACCERR) { + /* + * With reserved_va, the whole address space is PROT_NONE, + * which means that we may get ACCERR when we want MAPERR. + */ + if (page_get_flags(guest_addr) & PAGE_VALID) { + maperr = false; + } else { + info->si_code = SEGV_MAPERR; + } + } + + sigprocmask(SIG_SETMASK, host_signal_mask(uc), NULL); + cpu_loop_exit_sigsegv(cpu, guest_addr, access_type, maperr, pc); +} + +static void host_sigbus_handler(CPUState *cpu, siginfo_t *info, + host_sigcontext *uc) +{ + uintptr_t pc = host_signal_pc(uc); + bool is_write = host_signal_write(info, uc); + MMUAccessType access_type = adjust_signal_pc(&pc, is_write); + + /* + * If the access was not on behalf of the guest, within the executable + * mapping of the generated code buffer, then it is a host bug. + */ + if (!in_code_gen_buffer((void *)(pc - tcg_splitwx_diff))) { + die_from_signal(info); + } + + if (info->si_code == BUS_ADRALN) { + uintptr_t host_addr = (uintptr_t)info->si_addr; + abi_ptr guest_addr = h2g_nocheck(host_addr); + + sigprocmask(SIG_SETMASK, host_signal_mask(uc), NULL); + cpu_loop_exit_sigbus(cpu, guest_addr, access_type, pc); + } +} + static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) { CPUState *cpu = thread_cpu; @@ -822,73 +896,23 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) int guest_sig; uintptr_t pc = 0; bool sync_sig = false; - void *sigmask = host_signal_mask(uc); + void *sigmask; /* * Non-spoofed SIGSEGV and SIGBUS are synchronous, and need special * handling wrt signal blocking and unwinding. */ - if ((host_sig == SIGSEGV || host_sig == SIGBUS) && info->si_code > 0) { - MMUAccessType access_type; - uintptr_t host_addr; - abi_ptr guest_addr; - bool is_write; - - host_addr = (uintptr_t)info->si_addr; - - /* - * Convert forcefully to guest address space: addresses outside - * reserved_va are still valid to report via SEGV_MAPERR. - */ - guest_addr = h2g_nocheck(host_addr); - - pc = host_signal_pc(uc); - is_write = host_signal_write(info, uc); - access_type = adjust_signal_pc(&pc, is_write); - - /* If this was a write to a TB protected page, restart. */ - if (is_write - && host_sig == SIGSEGV - && info->si_code == SEGV_ACCERR - && h2g_valid(host_addr) - && handle_sigsegv_accerr_write(cpu, sigmask, pc, guest_addr)) { + if (info->si_code > 0) { + switch (host_sig) { + case SIGSEGV: + /* Only returns on handle_sigsegv_accerr_write success. */ + host_sigsegv_handler(cpu, info, uc); return; + case SIGBUS: + host_sigbus_handler(cpu, info, uc); + sync_sig = true; + break; } - - /* - * If the access was not on behalf of the guest, within the executable - * mapping of the generated code buffer, then it is a host bug. - */ - if (access_type != MMU_INST_FETCH - && !in_code_gen_buffer((void *)(pc - tcg_splitwx_diff))) { - die_from_signal(info); - } - - if (host_sig == SIGSEGV) { - bool maperr = true; - - if (info->si_code == SEGV_ACCERR && h2g_valid(host_addr)) { - /* - * With reserved_va, the whole address space is PROT_NONE, - * which means that we may get ACCERR when we want MAPERR. - */ - if (page_get_flags(guest_addr) & PAGE_VALID) { - maperr = false; - } else { - info->si_code = SEGV_MAPERR; - } - } - - sigprocmask(SIG_SETMASK, sigmask, NULL); - cpu_loop_exit_sigsegv(cpu, guest_addr, access_type, maperr, pc); - } else { - sigprocmask(SIG_SETMASK, sigmask, NULL); - if (info->si_code == BUS_ADRALN) { - cpu_loop_exit_sigbus(cpu, guest_addr, access_type, pc); - } - } - - sync_sig = true; } /* get target signal number */ @@ -929,6 +953,7 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) * would write 0xff bytes off the end of the structure and trash * data on the struct. */ + sigmask = host_signal_mask(uc); memset(sigmask, 0xff, SIGSET_T_SIZE); sigdelset(sigmask, SIGSEGV); sigdelset(sigmask, SIGBUS); From 4a6ebc19a7e2d013a0e2ca79452ea733b1d5b686 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Tue, 22 Aug 2023 22:07:41 -0700 Subject: [PATCH 13/14] linux-user: Detect and report host SIGILL, SIGFPE, SIGTRAP These signals, when not spoofed via kill(), are always bugs. Use die_from_signal to report this sensibly. Acked-by: Helge Deller Signed-off-by: Richard Henderson --- linux-user/signal.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index 706b8ac7a7..b67077f320 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -796,6 +796,43 @@ void die_from_signal(siginfo_t *info) break; } break; + case SIGILL: + sig = "ILL"; + switch (info->si_code) { + case ILL_ILLOPC: + code = "ILLOPC"; + break; + case ILL_ILLOPN: + code = "ILLOPN"; + break; + case ILL_ILLADR: + code = "ILLADR"; + break; + case ILL_PRVOPC: + code = "PRVOPC"; + break; + case ILL_PRVREG: + code = "PRVREG"; + break; + case ILL_COPROC: + code = "COPROC"; + break; + } + break; + case SIGFPE: + sig = "FPE"; + switch (info->si_code) { + case FPE_INTDIV: + code = "INTDIV"; + break; + case FPE_INTOVF: + code = "INTOVF"; + break; + } + break; + case SIGTRAP: + sig = "TRAP"; + break; default: snprintf(sigbuf, sizeof(sigbuf), "%d", info->si_signo); sig = sigbuf; @@ -900,7 +937,8 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) /* * Non-spoofed SIGSEGV and SIGBUS are synchronous, and need special - * handling wrt signal blocking and unwinding. + * handling wrt signal blocking and unwinding. Non-spoofed SIGILL, + * SIGFPE, SIGTRAP are always host bugs. */ if (info->si_code > 0) { switch (host_sig) { @@ -912,6 +950,10 @@ static void host_signal_handler(int host_sig, siginfo_t *info, void *puc) host_sigbus_handler(cpu, info, uc); sync_sig = true; break; + case SIGILL: + case SIGFPE: + case SIGTRAP: + die_from_signal(info); } } From 38ee0a7dfb4b15407678df26448e4a18fd9a51d4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 30 Sep 2023 12:05:11 -0700 Subject: [PATCH 14/14] linux-user: Remap guest SIGABRT Distinguish host SIGABRT from guest SIGABRT by mapping the guest signal onto one of the host RT signals. This prevents a cycle by which a host assertion failure is caught and handled by host_signal_handler, queued for the guest, and then we attempt to continue past the host abort. What happens next depends on the host libc, but is neither good nor helpful. Acked-by: Helge Deller Signed-off-by: Richard Henderson --- linux-user/signal.c | 42 +++++++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 7 deletions(-) diff --git a/linux-user/signal.c b/linux-user/signal.c index b67077f320..3b8efec89f 100644 --- a/linux-user/signal.c +++ b/linux-user/signal.c @@ -522,8 +522,21 @@ static void signal_table_init(void) * multiplexed over a single host signal. * Attempts for configure "missing" signals via sigaction will be * silently ignored. + * + * Remap the target SIGABRT, so that we can distinguish host abort + * from guest abort. When the guest registers a signal handler or + * calls raise(SIGABRT), the host will raise SIG_RTn. If the guest + * arrives at dump_core_and_abort(), we will map back to host SIGABRT + * so that the parent (native or emulated) sees the correct signal. + * Finally, also map host to guest SIGABRT so that the emulated + * parent sees the correct mapping from wait status. */ - for (hsig = SIGRTMIN; hsig <= SIGRTMAX; hsig++) { + + hsig = SIGRTMIN; + host_to_target_signal_table[SIGABRT] = 0; + host_to_target_signal_table[hsig++] = TARGET_SIGABRT; + + for (; hsig <= SIGRTMAX; hsig++) { tsig = hsig - SIGRTMIN + TARGET_SIGRTMIN; if (tsig <= TARGET_NSIG) { host_to_target_signal_table[hsig] = tsig; @@ -539,6 +552,8 @@ static void signal_table_init(void) } } + host_to_target_signal_table[SIGABRT] = TARGET_SIGABRT; + /* Map everything else out-of-bounds. */ for (hsig = 1; hsig < _NSIG; hsig++) { if (host_to_target_signal_table[hsig] == 0) { @@ -582,13 +597,21 @@ void signal_init(void) int hsig = target_to_host_signal(tsig); abi_ptr thand = TARGET_SIG_IGN; - if (hsig < _NSIG) { - struct sigaction *iact = core_dump_signal(tsig) ? &act : NULL; + if (hsig >= _NSIG) { + continue; + } + /* As we force remap SIGABRT, cannot probe and install in one step. */ + if (tsig == TARGET_SIGABRT) { + sigaction(SIGABRT, NULL, &oact); + sigaction(hsig, &act, NULL); + } else { + struct sigaction *iact = core_dump_signal(tsig) ? &act : NULL; sigaction(hsig, iact, &oact); - if (oact.sa_sigaction != (void *)SIG_IGN) { - thand = TARGET_SIG_DFL; - } + } + + if (oact.sa_sigaction != (void *)SIG_IGN) { + thand = TARGET_SIG_DFL; } sigact_table[tsig - 1]._sa_handler = thand; } @@ -711,7 +734,12 @@ void dump_core_and_abort(CPUArchState *env, int target_sig) TaskState *ts = (TaskState *)cpu->opaque; int host_sig, core_dumped = 0; - host_sig = target_to_host_signal(target_sig); + /* On exit, undo the remapping of SIGABRT. */ + if (target_sig == TARGET_SIGABRT) { + host_sig = SIGABRT; + } else { + host_sig = target_to_host_signal(target_sig); + } trace_user_dump_core_and_abort(env, target_sig, host_sig); gdb_signalled(env, target_sig);