target/arm: Handle FPCR.AH in negation step in FMLS (indexed)
Handle the FPCR.AH "don't negate the sign of a NaN" semantics in FMLS (indexed). We do this by creating 6 new helpers, which allow us to do the negation either by XOR (for AH=0) or by muladd flags (for AH=1). Signed-off-by: Peter Maydell <peter.maydell@linaro.org> [PMM: Mostly from RTH's patch; error in index order into fns[][] fixed] Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
fdf89638dc
commit
b85d8684c5
@ -813,6 +813,20 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
|
|||||||
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
|
||||||
|
@ -6726,10 +6726,16 @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
|
|||||||
|
|
||||||
static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
|
static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
|
||||||
{
|
{
|
||||||
static gen_helper_gvec_4_ptr * const fns[3] = {
|
static gen_helper_gvec_4_ptr * const fns[3][3] = {
|
||||||
gen_helper_gvec_fmla_idx_h,
|
{ gen_helper_gvec_fmla_idx_h,
|
||||||
gen_helper_gvec_fmla_idx_s,
|
gen_helper_gvec_fmla_idx_s,
|
||||||
gen_helper_gvec_fmla_idx_d,
|
gen_helper_gvec_fmla_idx_d },
|
||||||
|
{ gen_helper_gvec_fmls_idx_h,
|
||||||
|
gen_helper_gvec_fmls_idx_s,
|
||||||
|
gen_helper_gvec_fmls_idx_d },
|
||||||
|
{ gen_helper_gvec_ah_fmls_idx_h,
|
||||||
|
gen_helper_gvec_ah_fmls_idx_s,
|
||||||
|
gen_helper_gvec_ah_fmls_idx_d },
|
||||||
};
|
};
|
||||||
MemOp esz = a->esz;
|
MemOp esz = a->esz;
|
||||||
int check = fp_access_check_vector_hsd(s, a->q, esz);
|
int check = fp_access_check_vector_hsd(s, a->q, esz);
|
||||||
@ -6740,8 +6746,7 @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
|
|||||||
|
|
||||||
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
|
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
|
||||||
esz == MO_16 ? FPST_A64_F16 : FPST_A64,
|
esz == MO_16 ? FPST_A64_F16 : FPST_A64,
|
||||||
(a->idx << 1) | neg,
|
a->idx, fns[neg ? 1 + s->fpcr_ah : 0][esz - 1]);
|
||||||
fns[esz - 1]);
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3524,21 +3524,24 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
|
|||||||
*** SVE Floating Point Multiply-Add Indexed Group
|
*** SVE Floating Point Multiply-Add Indexed Group
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
|
static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = {
|
||||||
{
|
NULL, gen_helper_gvec_fmla_idx_h,
|
||||||
static gen_helper_gvec_4_ptr * const fns[4] = {
|
gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d
|
||||||
NULL,
|
};
|
||||||
gen_helper_gvec_fmla_idx_h,
|
TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
|
||||||
gen_helper_gvec_fmla_idx_s,
|
fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index,
|
||||||
gen_helper_gvec_fmla_idx_d,
|
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||||
};
|
|
||||||
return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
|
|
||||||
(a->index << 1) | sub,
|
|
||||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
|
|
||||||
}
|
|
||||||
|
|
||||||
TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
|
static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = {
|
||||||
TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
|
{ NULL, NULL },
|
||||||
|
{ gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h },
|
||||||
|
{ gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s },
|
||||||
|
{ gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d },
|
||||||
|
};
|
||||||
|
TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
|
||||||
|
fmls_idx_fns[a->esz][s->fpcr_ah],
|
||||||
|
a->rd, a->rn, a->rm, a->ra, a->index,
|
||||||
|
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*** SVE Floating Point Multiply Indexed Group
|
*** SVE Floating Point Multiply Indexed Group
|
||||||
|
@ -1680,29 +1680,35 @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4)
|
|||||||
|
|
||||||
#undef DO_FMUL_IDX
|
#undef DO_FMUL_IDX
|
||||||
|
|
||||||
#define DO_FMLA_IDX(NAME, TYPE, H) \
|
#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \
|
||||||
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
|
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
|
||||||
float_status *stat, uint32_t desc) \
|
float_status *stat, uint32_t desc) \
|
||||||
{ \
|
{ \
|
||||||
intptr_t i, j, oprsz = simd_oprsz(desc); \
|
intptr_t i, j, oprsz = simd_oprsz(desc); \
|
||||||
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
|
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
|
||||||
TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
|
intptr_t idx = simd_data(desc); \
|
||||||
intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
|
|
||||||
TYPE *d = vd, *n = vn, *m = vm, *a = va; \
|
TYPE *d = vd, *n = vn, *m = vm, *a = va; \
|
||||||
op1_neg <<= (8 * sizeof(TYPE) - 1); \
|
|
||||||
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
|
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
|
||||||
TYPE mm = m[H(i + idx)]; \
|
TYPE mm = m[H(i + idx)]; \
|
||||||
for (j = 0; j < segment; j++) { \
|
for (j = 0; j < segment; j++) { \
|
||||||
d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \
|
d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \
|
||||||
mm, a[i + j], 0, stat); \
|
a[i + j], NEGF, stat); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
clear_tail(d, oprsz, simd_maxsz(desc)); \
|
clear_tail(d, oprsz, simd_maxsz(desc)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
|
DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0)
|
||||||
DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
|
DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0)
|
||||||
DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8)
|
DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0)
|
||||||
|
|
||||||
|
DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0)
|
||||||
|
DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0)
|
||||||
|
DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0)
|
||||||
|
|
||||||
|
DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product)
|
||||||
|
DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product)
|
||||||
|
DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product)
|
||||||
|
|
||||||
#undef DO_FMLA_IDX
|
#undef DO_FMLA_IDX
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user