target/arm: Implement FPCR.AH handling of negation of NaN

FPCR.AH == 1 mandates that negation of a NaN value should not flip
its sign bit.  This means we can no longer use gen_vfp_neg*()
everywhere but must instead generate slightly more complex code when
FPCR.AH is set.

Make this change for the scalar FNEG and for those places in
translate-a64.c which were previously directly calling
gen_vfp_neg*().

This change in semantics also affects any other instruction whose
pseudocode calls FPNeg(); in following commits we extend this
change to the other affected instructions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Peter Maydell 2025-02-01 16:39:34 +00:00
parent 4ba5383bc5
commit e76df44d2d

View File

@ -828,6 +828,74 @@ static void gen_gvec_op4_fpst(DisasContext *s, bool is_q, int rd, int rn,
is_q ? 16 : 8, vec_full_reg_size(s), data, fn);
}
/*
* When FPCR.AH == 1, NEG and ABS do not flip the sign bit of a NaN.
* These functions implement
* d = floatN_is_any_nan(s) ? s : floatN_chs(s)
* which for float32 is
* d = (s & ~(1 << 31)) > 0x7f800000UL) ? s : (s ^ (1 << 31))
* and similarly for the other float sizes.
*/
static void gen_vfp_ah_negh(TCGv_i32 d, TCGv_i32 s)
{
TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
gen_vfp_negh(chs_s, s);
gen_vfp_absh(abs_s, s);
tcg_gen_movcond_i32(TCG_COND_GTU, d,
abs_s, tcg_constant_i32(0x7c00),
s, chs_s);
}
static void gen_vfp_ah_negs(TCGv_i32 d, TCGv_i32 s)
{
TCGv_i32 abs_s = tcg_temp_new_i32(), chs_s = tcg_temp_new_i32();
gen_vfp_negs(chs_s, s);
gen_vfp_abss(abs_s, s);
tcg_gen_movcond_i32(TCG_COND_GTU, d,
abs_s, tcg_constant_i32(0x7f800000UL),
s, chs_s);
}
static void gen_vfp_ah_negd(TCGv_i64 d, TCGv_i64 s)
{
TCGv_i64 abs_s = tcg_temp_new_i64(), chs_s = tcg_temp_new_i64();
gen_vfp_negd(chs_s, s);
gen_vfp_absd(abs_s, s);
tcg_gen_movcond_i64(TCG_COND_GTU, d,
abs_s, tcg_constant_i64(0x7ff0000000000000ULL),
s, chs_s);
}
static void gen_vfp_maybe_ah_negh(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
{
if (dc->fpcr_ah) {
gen_vfp_ah_negh(d, s);
} else {
gen_vfp_negh(d, s);
}
}
static void gen_vfp_maybe_ah_negs(DisasContext *dc, TCGv_i32 d, TCGv_i32 s)
{
if (dc->fpcr_ah) {
gen_vfp_ah_negs(d, s);
} else {
gen_vfp_negs(d, s);
}
}
static void gen_vfp_maybe_ah_negd(DisasContext *dc, TCGv_i64 d, TCGv_i64 s)
{
if (dc->fpcr_ah) {
gen_vfp_ah_negd(d, s);
} else {
gen_vfp_negd(d, s);
}
}
/* Set ZF and NF based on a 64 bit result. This is alas fiddlier
* than the 32 bit equivalent.
*/
@ -5241,12 +5309,35 @@ static void gen_fnmul_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
gen_vfp_negd(d, d);
}
static void gen_fnmul_ah_h(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
{
gen_helper_vfp_mulh(d, n, m, s);
gen_vfp_ah_negh(d, d);
}
static void gen_fnmul_ah_s(TCGv_i32 d, TCGv_i32 n, TCGv_i32 m, TCGv_ptr s)
{
gen_helper_vfp_muls(d, n, m, s);
gen_vfp_ah_negs(d, d);
}
static void gen_fnmul_ah_d(TCGv_i64 d, TCGv_i64 n, TCGv_i64 m, TCGv_ptr s)
{
gen_helper_vfp_muld(d, n, m, s);
gen_vfp_ah_negd(d, d);
}
static const FPScalar f_scalar_fnmul = {
gen_fnmul_h,
gen_fnmul_s,
gen_fnmul_d,
};
TRANS(FNMUL_s, do_fp3_scalar, a, &f_scalar_fnmul, a->rn)
static const FPScalar f_scalar_ah_fnmul = {
gen_fnmul_ah_h,
gen_fnmul_ah_s,
gen_fnmul_ah_d,
};
TRANS(FNMUL_s, do_fp3_scalar_2fn, a, &f_scalar_fnmul, &f_scalar_ah_fnmul, a->rn)
static const FPScalar f_scalar_fcmeq = {
gen_helper_advsimd_ceq_f16,
@ -6388,7 +6479,7 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
read_vec_element(s, t2, a->rm, a->idx, MO_64);
if (neg) {
gen_vfp_negd(t1, t1);
gen_vfp_maybe_ah_negd(s, t1, t1);
}
gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
write_fp_dreg_merging(s, a->rd, a->rd, t0);
@ -6402,7 +6493,7 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
read_vec_element_i32(s, t2, a->rm, a->idx, MO_32);
if (neg) {
gen_vfp_negs(t1, t1);
gen_vfp_maybe_ah_negs(s, t1, t1);
}
gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
write_fp_sreg_merging(s, a->rd, a->rd, t0);
@ -6419,7 +6510,7 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
read_vec_element_i32(s, t2, a->rm, a->idx, MO_16);
if (neg) {
gen_vfp_negh(t1, t1);
gen_vfp_maybe_ah_negh(s, t1, t1);
}
gen_helper_advsimd_muladdh(t0, t1, t2, t0,
fpstatus_ptr(FPST_A64_F16));
@ -6902,10 +6993,10 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
TCGv_i64 ta = read_fp_dreg(s, a->ra);
if (neg_a) {
gen_vfp_negd(ta, ta);
gen_vfp_maybe_ah_negd(s, ta, ta);
}
if (neg_n) {
gen_vfp_negd(tn, tn);
gen_vfp_maybe_ah_negd(s, tn, tn);
}
fpst = fpstatus_ptr(FPST_A64);
gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
@ -6920,10 +7011,10 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
TCGv_i32 ta = read_fp_sreg(s, a->ra);
if (neg_a) {
gen_vfp_negs(ta, ta);
gen_vfp_maybe_ah_negs(s, ta, ta);
}
if (neg_n) {
gen_vfp_negs(tn, tn);
gen_vfp_maybe_ah_negs(s, tn, tn);
}
fpst = fpstatus_ptr(FPST_A64);
gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
@ -6941,10 +7032,10 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
TCGv_i32 ta = read_fp_hreg(s, a->ra);
if (neg_a) {
gen_vfp_negh(ta, ta);
gen_vfp_maybe_ah_negh(s, ta, ta);
}
if (neg_n) {
gen_vfp_negh(tn, tn);
gen_vfp_maybe_ah_negh(s, tn, tn);
}
fpst = fpstatus_ptr(FPST_A64_F16);
gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
@ -8533,6 +8624,13 @@ static bool do_fp1_scalar_int(DisasContext *s, arg_rr_e *a,
return true;
}
static bool do_fp1_scalar_int_2fn(DisasContext *s, arg_rr_e *a,
const FPScalar1Int *fnormal,
const FPScalar1Int *fah)
{
return do_fp1_scalar_int(s, a, s->fpcr_ah ? fah : fnormal, true);
}
static const FPScalar1Int f_scalar_fmov = {
tcg_gen_mov_i32,
tcg_gen_mov_i32,
@ -8552,7 +8650,12 @@ static const FPScalar1Int f_scalar_fneg = {
gen_vfp_negs,
gen_vfp_negd,
};
TRANS(FNEG_s, do_fp1_scalar_int, a, &f_scalar_fneg, true)
static const FPScalar1Int f_scalar_ah_fneg = {
gen_vfp_ah_negh,
gen_vfp_ah_negs,
gen_vfp_ah_negd,
};
TRANS(FNEG_s, do_fp1_scalar_int_2fn, a, &f_scalar_fneg, &f_scalar_ah_fneg)
typedef struct FPScalar1 {
void (*gen_h)(TCGv_i32, TCGv_i32, TCGv_ptr);