target/arm: Handle FPCR.AH in FCMLA by index

The negation step in FCMLA by index mustn't negate a NaN when
FPCR.AH is set. Use the same approach as vector FCMLA of
passing in FPCR.AH and using it to select whether to negate
by XOR or by the muladd negate_product flag.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Message-id: 20250129013857.135256-27-richard.henderson@linaro.org
[PMM: Expanded commit message]
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Richard Henderson 2025-02-01 16:39:50 +00:00 committed by Peter Maydell
parent a7868aaa30
commit 6d5ccfd44f
2 changed files with 27 additions and 19 deletions

View File

@ -6916,7 +6916,7 @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
if (fp_access_check(s)) {
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
(a->idx << 2) | a->rot, fn);
(s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
}
return true;
}

View File

@ -995,29 +995,33 @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va,
uintptr_t opr_sz = simd_oprsz(desc);
float16 *d = vd, *n = vn, *m = vm, *a = va;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
uint32_t neg_real = flip ^ neg_imag;
uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
uint32_t negf_real = flip ^ negf_imag;
intptr_t elements = opr_sz / sizeof(float16);
intptr_t eltspersegment = MIN(16 / sizeof(float16), elements);
float16 negx_imag, negx_real;
intptr_t i, j;
/* Shift boolean to the sign bit so we can xor to negate. */
neg_real <<= 15;
neg_imag <<= 15;
/* With AH=0, use negx; with AH=1 use negf. */
negx_real = (negf_real & ~fpcr_ah) << 15;
negx_imag = (negf_imag & ~fpcr_ah) << 15;
negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < elements; i += eltspersegment) {
float16 mr = m[H2(i + 2 * index + 0)];
float16 mi = m[H2(i + 2 * index + 1)];
float16 e1 = neg_real ^ (flip ? mi : mr);
float16 e3 = neg_imag ^ (flip ? mr : mi);
float16 e1 = negx_real ^ (flip ? mi : mr);
float16 e3 = negx_imag ^ (flip ? mr : mi);
for (j = i; j < i + eltspersegment; j += 2) {
float16 e2 = n[H2(j + flip)];
float16 e4 = e2;
d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst);
d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst);
d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst);
d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst);
}
}
clear_tail(d, opr_sz, simd_maxsz(desc));
@ -1059,29 +1063,33 @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va,
uintptr_t opr_sz = simd_oprsz(desc);
float32 *d = vd, *n = vn, *m = vm, *a = va;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
uint32_t neg_real = flip ^ neg_imag;
uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
uint32_t negf_real = flip ^ negf_imag;
intptr_t elements = opr_sz / sizeof(float32);
intptr_t eltspersegment = MIN(16 / sizeof(float32), elements);
float32 negx_imag, negx_real;
intptr_t i, j;
/* Shift boolean to the sign bit so we can xor to negate. */
neg_real <<= 31;
neg_imag <<= 31;
/* With AH=0, use negx; with AH=1 use negf. */
negx_real = (negf_real & ~fpcr_ah) << 31;
negx_imag = (negf_imag & ~fpcr_ah) << 31;
negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < elements; i += eltspersegment) {
float32 mr = m[H4(i + 2 * index + 0)];
float32 mi = m[H4(i + 2 * index + 1)];
float32 e1 = neg_real ^ (flip ? mi : mr);
float32 e3 = neg_imag ^ (flip ? mr : mi);
float32 e1 = negx_real ^ (flip ? mi : mr);
float32 e3 = negx_imag ^ (flip ? mr : mi);
for (j = i; j < i + eltspersegment; j += 2) {
float32 e2 = n[H4(j + flip)];
float32 e4 = e2;
d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst);
d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst);
d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst);
d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst);
}
}
clear_tail(d, opr_sz, simd_maxsz(desc));