diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index ed5e4a4599..32f0647ca4 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -38,6 +38,7 @@ #ifdef CONFIG_USER_ONLY #include "user/page-protection.h" #endif +#include "vec_internal.h" /* C2.4.7 Multiply and divide */ /* special cases for 0 and LLONG_MIN are mandated by the standard */ @@ -208,88 +209,52 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst) return -float64_lt(b, a, fpst); } -/* Reciprocal step and sqrt step. Note that unlike the A32/T32 +/* + * Reciprocal step and sqrt step. Note that unlike the A32/T32 * versions, these do a fully fused multiply-add or * multiply-add-and-halve. + * The FPCR.AH == 1 versions need to avoid flipping the sign of NaN. */ - -uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst) -{ - a = float16_squash_input_denormal(a, fpst); - b = float16_squash_input_denormal(b, fpst); - - a = float16_chs(a); - if ((float16_is_infinity(a) && float16_is_zero(b)) || - (float16_is_infinity(b) && float16_is_zero(a))) { - return float16_two; +#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \ + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ + { \ + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ + a = FLOATTYPE ## _ ## CHSFN(a); \ + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ + return FLOATTYPE ## _two; \ + } \ + return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \ } - return float16_muladd(a, b, float16_two, 0, fpst); -} -float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst) -{ - a = float32_squash_input_denormal(a, fpst); - b = float32_squash_input_denormal(b, fpst); +DO_RECPS(recpsf_f16, uint32_t, float16, chs) +DO_RECPS(recpsf_f32, float32, float32, chs) +DO_RECPS(recpsf_f64, float64, float64, chs) +DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs) +DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs) +DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs) - a = float32_chs(a); - if ((float32_is_infinity(a) && float32_is_zero(b)) || - (float32_is_infinity(b) && float32_is_zero(a))) { - return float32_two; - } - return float32_muladd(a, b, float32_two, 0, fpst); -} +#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \ + CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \ + { \ + a = FLOATTYPE ## _squash_input_denormal(a, fpst); \ + b = FLOATTYPE ## _squash_input_denormal(b, fpst); \ + a = FLOATTYPE ## _ ## CHSFN(a); \ + if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \ + (FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \ + return FLOATTYPE ## _one_point_five; \ + } \ + return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \ + -1, 0, fpst); \ + } \ -float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst) -{ - a = float64_squash_input_denormal(a, fpst); - b = float64_squash_input_denormal(b, fpst); - - a = float64_chs(a); - if ((float64_is_infinity(a) && float64_is_zero(b)) || - (float64_is_infinity(b) && float64_is_zero(a))) { - return float64_two; - } - return float64_muladd(a, b, float64_two, 0, fpst); -} - -uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst) -{ - a = float16_squash_input_denormal(a, fpst); - b = float16_squash_input_denormal(b, fpst); - - a = float16_chs(a); - if ((float16_is_infinity(a) && float16_is_zero(b)) || - (float16_is_infinity(b) && float16_is_zero(a))) { - return float16_one_point_five; - } - return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst); -} - -float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst) -{ - a = float32_squash_input_denormal(a, fpst); - b = float32_squash_input_denormal(b, fpst); - - a = float32_chs(a); - if ((float32_is_infinity(a) && float32_is_zero(b)) || - (float32_is_infinity(b) && float32_is_zero(a))) { - return float32_one_point_five; - } - return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst); -} - -float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst) -{ - a = float64_squash_input_denormal(a, fpst); - b = float64_squash_input_denormal(b, fpst); - - a = float64_chs(a); - if ((float64_is_infinity(a) && float64_is_zero(b)) || - (float64_is_infinity(b) && float64_is_zero(a))) { - return float64_one_point_five; - } - return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst); -} +DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs) +DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs) +DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs) +DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs) +DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs) +DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs) /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst) diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h index ae0424f6de..85023465b7 100644 --- a/target/arm/tcg/helper-a64.h +++ b/target/arm/tcg/helper-a64.h @@ -38,9 +38,15 @@ DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst) DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) +DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) +DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) +DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst) +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst) +DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst) DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst) DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst) DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst) diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index e8db9f39b1..1b6d8598b8 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -5239,11 +5239,12 @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f, FPST_A64_F16 : FPST_A64); } -static bool do_fp3_scalar_ah(DisasContext *s, arg_rrr_e *a, const FPScalar *f, - int mergereg) +static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a, + const FPScalar *fnormal, const FPScalar *fah, + int mergereg) { - return do_fp3_scalar_with_fpsttype(s, a, f, mergereg, - select_ah_fpst(s, a->esz)); + return do_fp3_scalar_with_fpsttype(s, a, s->fpcr_ah ? fah : fnormal, + mergereg, select_ah_fpst(s, a->esz)); } /* Some insns need to call different helpers when FPCR.AH == 1 */ @@ -5464,14 +5465,26 @@ static const FPScalar f_scalar_frecps = { gen_helper_recpsf_f32, gen_helper_recpsf_f64, }; -TRANS(FRECPS_s, do_fp3_scalar_ah, a, &f_scalar_frecps, a->rn) +static const FPScalar f_scalar_ah_frecps = { + gen_helper_recpsf_ah_f16, + gen_helper_recpsf_ah_f32, + gen_helper_recpsf_ah_f64, +}; +TRANS(FRECPS_s, do_fp3_scalar_ah_2fn, a, + &f_scalar_frecps, &f_scalar_ah_frecps, a->rn) static const FPScalar f_scalar_frsqrts = { gen_helper_rsqrtsf_f16, gen_helper_rsqrtsf_f32, gen_helper_rsqrtsf_f64, }; -TRANS(FRSQRTS_s, do_fp3_scalar_ah, a, &f_scalar_frsqrts, a->rn) +static const FPScalar f_scalar_ah_frsqrts = { + gen_helper_rsqrtsf_ah_f16, + gen_helper_rsqrtsf_ah_f32, + gen_helper_rsqrtsf_ah_f64, +}; +TRANS(FRSQRTS_s, do_fp3_scalar_ah_2fn, a, + &f_scalar_frsqrts, &f_scalar_ah_frsqrts, a->rn) static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a, const FPScalar *f, bool swap) diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h index 826791523a..6b93b5aeb9 100644 --- a/target/arm/tcg/vec_internal.h +++ b/target/arm/tcg/vec_internal.h @@ -267,6 +267,24 @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2, */ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp); +/* + * Negate as for FPCR.AH=1 -- do not negate NaNs. + */ +static inline float16 float16_ah_chs(float16 a) +{ + return float16_is_any_nan(a) ? a : float16_chs(a); +} + +static inline float32 float32_ah_chs(float32 a) +{ + return float32_is_any_nan(a) ? a : float32_chs(a); +} + +static inline float64 float64_ah_chs(float64 a) +{ + return float64_is_any_nan(a) ? a : float64_chs(a); +} + static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah) { return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a);