From 5a558be93ad628e5bed6e0ee062870f49251725c Mon Sep 17 00:00:00 2001 From: Marco Palumbi Date: Thu, 1 Aug 2024 10:15:02 +0100 Subject: [PATCH 1/4] hw/arm/mps2-tz.c: fix RX/TX interrupts order The order of the RX and TX interrupts are swapped. This commit fixes the order as per the following documents: * https://developer.arm.com/documentation/dai0505/latest/ * https://developer.arm.com/documentation/dai0521/latest/ * https://developer.arm.com/documentation/dai0524/latest/ * https://developer.arm.com/documentation/dai0547/latest/ Cc: qemu-stable@nongnu.org Signed-off-by: Marco Palumbi Message-id: 20240730073123.72992-1-marco@palumbi.it Reviewed-by: Peter Maydell Signed-off-by: Peter Maydell --- hw/arm/mps2-tz.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/arm/mps2-tz.c b/hw/arm/mps2-tz.c index a2d18afd79..aec57c0d68 100644 --- a/hw/arm/mps2-tz.c +++ b/hw/arm/mps2-tz.c @@ -435,7 +435,7 @@ static MemoryRegion *make_uart(MPS2TZMachineState *mms, void *opaque, const char *name, hwaddr size, const int *irqs, const PPCExtraData *extradata) { - /* The irq[] array is tx, rx, combined, in that order */ + /* The irq[] array is rx, tx, combined, in that order */ MPS2TZMachineClass *mmc = MPS2TZ_MACHINE_GET_CLASS(mms); CMSDKAPBUART *uart = opaque; int i = uart - &mms->uart[0]; @@ -447,8 +447,8 @@ static MemoryRegion *make_uart(MPS2TZMachineState *mms, void *opaque, qdev_prop_set_uint32(DEVICE(uart), "pclk-frq", mmc->apb_periph_frq); sysbus_realize(SYS_BUS_DEVICE(uart), &error_fatal); s = SYS_BUS_DEVICE(uart); - sysbus_connect_irq(s, 0, get_sse_irq_in(mms, irqs[0])); - sysbus_connect_irq(s, 1, get_sse_irq_in(mms, irqs[1])); + sysbus_connect_irq(s, 0, get_sse_irq_in(mms, irqs[1])); + sysbus_connect_irq(s, 1, get_sse_irq_in(mms, irqs[0])); sysbus_connect_irq(s, 2, qdev_get_gpio_in(orgate_dev, i * 2)); sysbus_connect_irq(s, 3, qdev_get_gpio_in(orgate_dev, i * 2 + 1)); sysbus_connect_irq(s, 4, get_sse_irq_in(mms, irqs[2])); From 036144cff27ef2f97de7ffc0c1150f3779b94d58 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Thu, 1 Aug 2024 10:15:03 +0100 Subject: [PATCH 2/4] accel/kvm/kvm-all: Fixes the missing break in vCPU unpark logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Loop should exit prematurely on successfully finding out the parked vCPU (struct KVMParkedVcpu) in the 'struct KVMState' maintained 'kvm_parked_vcpus' list of parked vCPUs. Fixes: Coverity CID 1558552 Fixes: 08c3286822 ("accel/kvm: Extract common KVM vCPU {creation,parking} code") Reported-by: Peter Maydell Signed-off-by: Salil Mehta Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Gavin Shan Reviewed-by: Zhao Liu Reviewed-by: Igor Mammedov Message-id: 20240725145132.99355-1-salil.mehta@huawei.com Suggested-by: Peter Maydell Message-ID: Signed-off-by: Salil Mehta Signed-off-by: Peter Maydell --- accel/kvm/kvm-all.c | 1 + 1 file changed, 1 insertion(+) diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c index e1d1386306..75d11a07b2 100644 --- a/accel/kvm/kvm-all.c +++ b/accel/kvm/kvm-all.c @@ -362,6 +362,7 @@ int kvm_unpark_vcpu(KVMState *s, unsigned long vcpu_id) QLIST_REMOVE(cpu, node); kvm_fd = cpu->kvm_fd; g_free(cpu); + break; } } From 55f9f4ee018c5ccea81d8c8c586756d7711ae46f Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 1 Aug 2024 10:15:03 +0100 Subject: [PATCH 3/4] target/arm: Handle denormals correctly for FMOPA (widening) The FMOPA (widening) SME instruction takes pairs of half-precision floating point values, widens them to single-precision, does a two-way dot product and accumulates the results into a single-precision destination. We don't quite correctly handle the FPCR bits FZ and FZ16 which control flushing of denormal inputs and outputs. This is because at the moment we pass a single float_status value to the helper function, which then uses that configuration for all the fp operations it does. However, because the inputs to this operation are float16 and the outputs are float32 we need to use the fp_status_f16 for the float16 input widening but the normal fp_status for everything else. Otherwise we will apply the flushing control FPCR.FZ16 to the 32-bit output rather than the FPCR.FZ control, and incorrectly flush a denormal output to zero when we should not (or vice-versa). (In commit 207d30b5fdb5b we tried to fix the FZ handling but didn't get it right, switching from "use FPCR.FZ for everything" to "use FPCR.FZ16 for everything".) Pass the CPU env to the sme_fmopa_h helper instead of an fp_status pointer, and have the helper pass an extra fp_status into the f16_dotadd() function so that we can use the right status for the right parts of this operation. Cc: qemu-stable@nongnu.org Fixes: 207d30b5fdb5 ("target/arm: Use FPST_F16 for SME FMOPA (widening)") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2373 Signed-off-by: Peter Maydell Reviewed-by: Richard Henderson --- target/arm/tcg/helper-sme.h | 2 +- target/arm/tcg/sme_helper.c | 39 +++++++++++++++++++++++----------- target/arm/tcg/translate-sme.c | 25 ++++++++++++++++++++-- 3 files changed, 51 insertions(+), 15 deletions(-) diff --git a/target/arm/tcg/helper-sme.h b/target/arm/tcg/helper-sme.h index 27eef49a11..d22bf9d21b 100644 --- a/target/arm/tcg/helper-sme.h +++ b/target/arm/tcg/helper-sme.h @@ -121,7 +121,7 @@ DEF_HELPER_FLAGS_5(sme_addha_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_5(sme_addva_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_7(sme_fmopa_h, TCG_CALL_NO_RWG, - void, ptr, ptr, ptr, ptr, ptr, ptr, i32) + void, ptr, ptr, ptr, ptr, ptr, env, i32) DEF_HELPER_FLAGS_7(sme_fmopa_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, ptr, ptr, i32) DEF_HELPER_FLAGS_7(sme_fmopa_d, TCG_CALL_NO_RWG, diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c index 3ba826a6ce..02106809ce 100644 --- a/target/arm/tcg/sme_helper.c +++ b/target/arm/tcg/sme_helper.c @@ -992,12 +992,23 @@ static inline uint32_t f16mop_adj_pair(uint32_t pair, uint32_t pg, uint32_t neg) } static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2, - float_status *s_std, float_status *s_odd) + float_status *s_f16, float_status *s_std, + float_status *s_odd) { - float64 e1r = float16_to_float64(e1 & 0xffff, true, s_std); - float64 e1c = float16_to_float64(e1 >> 16, true, s_std); - float64 e2r = float16_to_float64(e2 & 0xffff, true, s_std); - float64 e2c = float16_to_float64(e2 >> 16, true, s_std); + /* + * We need three different float_status for different parts of this + * operation: + * - the input conversion of the float16 values must use the + * f16-specific float_status, so that the FPCR.FZ16 control is applied + * - operations on float32 including the final accumulation must use + * the normal float_status, so that FPCR.FZ is applied + * - we have pre-set-up copy of s_std which is set to round-to-odd, + * for the multiply (see below) + */ + float64 e1r = float16_to_float64(e1 & 0xffff, true, s_f16); + float64 e1c = float16_to_float64(e1 >> 16, true, s_f16); + float64 e2r = float16_to_float64(e2 & 0xffff, true, s_f16); + float64 e2c = float16_to_float64(e2 >> 16, true, s_f16); float64 t64; float32 t32; @@ -1019,20 +1030,23 @@ static float32 f16_dotadd(float32 sum, uint32_t e1, uint32_t e2, } void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, - void *vpm, void *vst, uint32_t desc) + void *vpm, CPUARMState *env, uint32_t desc) { intptr_t row, col, oprsz = simd_maxsz(desc); uint32_t neg = simd_data(desc) * 0x80008000u; uint16_t *pn = vpn, *pm = vpm; - float_status fpst_odd, fpst_std; + float_status fpst_odd, fpst_std, fpst_f16; /* - * Make a copy of float_status because this operation does not - * update the cumulative fp exception status. It also produces - * default nans. Make a second copy with round-to-odd -- see above. + * Make copies of fp_status and fp_status_f16, because this operation + * does not update the cumulative fp exception status. It also + * produces default NaNs. We also need a second copy of fp_status with + * round-to-odd -- see above. */ - fpst_std = *(float_status *)vst; + fpst_f16 = env->vfp.fp_status_f16; + fpst_std = env->vfp.fp_status; set_default_nan_mode(true, &fpst_std); + set_default_nan_mode(true, &fpst_f16); fpst_odd = fpst_std; set_float_rounding_mode(float_round_to_odd, &fpst_odd); @@ -1052,7 +1066,8 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn, uint32_t m = *(uint32_t *)(vzm + H1_4(col)); m = f16mop_adj_pair(m, pcol, 0); - *a = f16_dotadd(*a, n, m, &fpst_std, &fpst_odd); + *a = f16_dotadd(*a, n, m, + &fpst_f16, &fpst_std, &fpst_odd); } col += 4; pcol >>= 4; diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c index a50a419af2..ae42ddef7b 100644 --- a/target/arm/tcg/translate-sme.c +++ b/target/arm/tcg/translate-sme.c @@ -334,8 +334,29 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz, return true; } -TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, - MO_32, FPST_FPCR_F16, gen_helper_sme_fmopa_h) +static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz, + gen_helper_gvec_5_ptr *fn) +{ + int svl = streaming_vec_reg_size(s); + uint32_t desc = simd_desc(svl, svl, a->sub); + TCGv_ptr za, zn, zm, pn, pm; + + if (!sme_smza_enabled_check(s)) { + return true; + } + + za = get_tile(s, esz, a->zad); + zn = vec_full_reg_ptr(s, a->zn); + zm = vec_full_reg_ptr(s, a->zm); + pn = pred_full_reg_ptr(s, a->pn); + pm = pred_full_reg_ptr(s, a->pm); + + fn(za, zn, zm, pn, pm, tcg_env, tcg_constant_i32(desc)); + return true; +} + +TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a, + MO_32, gen_helper_sme_fmopa_h) TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, FPST_FPCR, gen_helper_sme_fmopa_s) TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, From 5e8e4f098d872818aa9a138a171200068b81c8d1 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Wed, 31 Jul 2024 18:22:46 +0100 Subject: [PATCH 4/4] target/xtensa: Correct assert condition in handle_interrupt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In commit ad18376b90c8101 we added an assert that the level value was in-bounds for the array we're about to index into. However, the assert condition is wrong -- env->config->interrupt_vector is an array of uint32_t, so we should bounds check the index against ARRAY_SIZE(...), not against sizeof(). Resolves: Coverity CID 1507131 Fixes: ad18376b90c8101 ("target/xtensa: Assert that interrupt level is within bounds") Signed-off-by: Peter Maydell Acked-by: Max Filippov Reviewed-by: Philippe Mathieu-Daudé Message-id: 20240731172246.3682311-1-peter.maydell@linaro.org --- target/xtensa/exc_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/xtensa/exc_helper.c b/target/xtensa/exc_helper.c index 0514c2c1f3..ca629f071d 100644 --- a/target/xtensa/exc_helper.c +++ b/target/xtensa/exc_helper.c @@ -171,7 +171,7 @@ static void handle_interrupt(CPUXtensaState *env) if (level > 1) { /* env->config->nlevel check should have ensured this */ - assert(level < sizeof(env->config->interrupt_vector)); + assert(level < ARRAY_SIZE(env->config->interrupt_vector)); env->sregs[EPC1 + level - 1] = env->pc; env->sregs[EPS2 + level - 2] = env->sregs[PS];