target-arm queue:
* target/alpha: Don't corrupt error_code with unknown softfloat flags * target/arm: Implement FEAT_AFP and FEAT_RPRES -----BEGIN PGP SIGNATURE----- iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmereaQZHHBldGVyLm1h eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3gyLEACglOM4E0j1hRl/JZlWD384 nZL01Hayp9xwSNn28hkXaajCxkErTWLuCZax1g1fBvt/Yqn+E3oFan8gIybMEVgK 9ei6/m45fuICSQQhifvYTtYhAMd5uclr0anjRp9gN7FH6aaNPan/ZQYcKYxFq6cp RDTF5qiHIgTeXAlU+WiioxravL3A/D+jcQMYLEI5L+Vt5nYNM589PSNFWNLQ6W9e Gtmvp0uzrRSZgWxR3nOvhsn1NS/xXK90Zil+GPBo4jf82QVumqKYMsAcireOlxfk zTlHXH3PuonGj/ZPLxmiVKYhLb1RglQ9kIs/FHVel18QTz4dJ3DaJp8QXCNHbrKz 3aUwSiIh5Y/s3Q/X2Qy3jUHQ5tSjayhIhGFbn6zPdZ+2JZbIEu1Czeparddu/Zlq OR0CMVo2Lj/C6OakEU1/YRTKBKiNBaN1eVHi7gjzTDBdbMMC7ZlNuimpFAbthmSC szHzkgX8LXHzJqe4vip27yOMFBRPxvst/CXcEoPnjsLEQhLlKjOeFiHuEI+DUvaI 24AJ5b0FDdSOEcaFkxFD6gxW8E77MiNtBncfxDxTMKHs/4yFGiDihSPnOCANn3Kk zpQIwl0KJAPTA6Cldck9lY7MsKgGPTUNhEThadZlInbp4Uc6T1bvNDtB9b7osDfy FeposcM1+GBeuSde0yD6oQ== =P3wv -----END PGP SIGNATURE----- Merge tag 'pull-target-arm-20250211' of https://git.linaro.org/people/pmaydell/qemu-arm into staging target-arm queue: * target/alpha: Don't corrupt error_code with unknown softfloat flags * target/arm: Implement FEAT_AFP and FEAT_RPRES # -----BEGIN PGP SIGNATURE----- # # iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmereaQZHHBldGVyLm1h # eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3gyLEACglOM4E0j1hRl/JZlWD384 # nZL01Hayp9xwSNn28hkXaajCxkErTWLuCZax1g1fBvt/Yqn+E3oFan8gIybMEVgK # 9ei6/m45fuICSQQhifvYTtYhAMd5uclr0anjRp9gN7FH6aaNPan/ZQYcKYxFq6cp # RDTF5qiHIgTeXAlU+WiioxravL3A/D+jcQMYLEI5L+Vt5nYNM589PSNFWNLQ6W9e # Gtmvp0uzrRSZgWxR3nOvhsn1NS/xXK90Zil+GPBo4jf82QVumqKYMsAcireOlxfk # zTlHXH3PuonGj/ZPLxmiVKYhLb1RglQ9kIs/FHVel18QTz4dJ3DaJp8QXCNHbrKz # 3aUwSiIh5Y/s3Q/X2Qy3jUHQ5tSjayhIhGFbn6zPdZ+2JZbIEu1Czeparddu/Zlq # OR0CMVo2Lj/C6OakEU1/YRTKBKiNBaN1eVHi7gjzTDBdbMMC7ZlNuimpFAbthmSC # szHzkgX8LXHzJqe4vip27yOMFBRPxvst/CXcEoPnjsLEQhLlKjOeFiHuEI+DUvaI # 24AJ5b0FDdSOEcaFkxFD6gxW8E77MiNtBncfxDxTMKHs/4yFGiDihSPnOCANn3Kk # zpQIwl0KJAPTA6Cldck9lY7MsKgGPTUNhEThadZlInbp4Uc6T1bvNDtB9b7osDfy # FeposcM1+GBeuSde0yD6oQ== # =P3wv # -----END PGP SIGNATURE----- # gpg: Signature made Tue 11 Feb 2025 11:24:04 EST # gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE # gpg: issuer "peter.maydell@linaro.org" # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full] # gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full] # gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full] # gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown] # Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE * tag 'pull-target-arm-20250211' of https://git.linaro.org/people/pmaydell/qemu-arm: (68 commits) target/arm: Sink fp_status and fpcr access into do_fmlal* target/arm: Read fz16 from env->vfp.fpcr target/arm: Simplify DO_VFP_cmp in vfp_helper.c target/arm: Simplify fp_status indexing in mve_helper.c target/arm: Remove fp_status_a32 target/arm: Remove fp_status_a64 target/arm: Remove fp_status_f16_a32 target/arm: Remove fp_status_f16_a64 target/arm: Remove ah_fp_status target/arm: Remove ah_fp_status_f16 target/arm: Remove standard_fp_status target/arm: Remove standard_fp_status_f16 target/arm: Introduce CPUARMState.vfp.fp_status[] target/arm: Enable FEAT_RPRES for -cpu max target/arm: Implement increased precision FRSQRTE target/arm: Implement increased precision FRECPE target/arm: Plumb FEAT_RPRES frecpe and frsqrte through to new helper target/arm: Enable FEAT_AFP for '-cpu max' target/arm: Handle FPCR.AH in SVE FMLSLB, FMLSLT (vectors) target/arm: Handle FPCR.AH in SVE FMLSL (indexed) ... Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
commit
afbcca0ea4
@ -20,6 +20,7 @@ the following architecture extensions:
|
|||||||
- FEAT_AA64EL3 (Support for AArch64 at EL3)
|
- FEAT_AA64EL3 (Support for AArch64 at EL3)
|
||||||
- FEAT_AdvSIMD (Advanced SIMD Extension)
|
- FEAT_AdvSIMD (Advanced SIMD Extension)
|
||||||
- FEAT_AES (AESD and AESE instructions)
|
- FEAT_AES (AESD and AESE instructions)
|
||||||
|
- FEAT_AFP (Alternate floating-point behavior)
|
||||||
- FEAT_Armv9_Crypto (Armv9 Cryptographic Extension)
|
- FEAT_Armv9_Crypto (Armv9 Cryptographic Extension)
|
||||||
- FEAT_ASID16 (16 bit ASID)
|
- FEAT_ASID16 (16 bit ASID)
|
||||||
- FEAT_BBM at level 2 (Translation table break-before-make levels)
|
- FEAT_BBM at level 2 (Translation table break-before-make levels)
|
||||||
@ -117,6 +118,7 @@ the following architecture extensions:
|
|||||||
- FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions)
|
- FEAT_RDM (Advanced SIMD rounding double multiply accumulate instructions)
|
||||||
- FEAT_RME (Realm Management Extension) (NB: support status in QEMU is experimental)
|
- FEAT_RME (Realm Management Extension) (NB: support status in QEMU is experimental)
|
||||||
- FEAT_RNG (Random number generator)
|
- FEAT_RNG (Random number generator)
|
||||||
|
- FEAT_RPRES (Increased precision of FRECPE and FRSQRTE)
|
||||||
- FEAT_S2FWB (Stage 2 forced Write-Back)
|
- FEAT_S2FWB (Stage 2 forced Write-Back)
|
||||||
- FEAT_SB (Speculation Barrier)
|
- FEAT_SB (Speculation Barrier)
|
||||||
- FEAT_SEL2 (Secure EL2)
|
- FEAT_SEL2 (Secure EL2)
|
||||||
|
@ -204,7 +204,7 @@ static void partsN(canonicalize)(FloatPartsN *p, float_status *status,
|
|||||||
frac_clear(p);
|
frac_clear(p);
|
||||||
} else {
|
} else {
|
||||||
int shift = frac_normalize(p);
|
int shift = frac_normalize(p);
|
||||||
p->cls = float_class_normal;
|
p->cls = float_class_denormal;
|
||||||
p->exp = fmt->frac_shift - fmt->exp_bias
|
p->exp = fmt->frac_shift - fmt->exp_bias
|
||||||
- shift + !fmt->m68k_denormal;
|
- shift + !fmt->m68k_denormal;
|
||||||
}
|
}
|
||||||
@ -334,7 +334,8 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
|
|||||||
p->frac_lo &= ~round_mask;
|
p->frac_lo &= ~round_mask;
|
||||||
}
|
}
|
||||||
frac_shr(p, frac_shift);
|
frac_shr(p, frac_shift);
|
||||||
} else if (s->flush_to_zero) {
|
} else if (s->flush_to_zero &&
|
||||||
|
s->ftz_detection == float_ftz_before_rounding) {
|
||||||
flags |= float_flag_output_denormal_flushed;
|
flags |= float_flag_output_denormal_flushed;
|
||||||
p->cls = float_class_zero;
|
p->cls = float_class_zero;
|
||||||
exp = 0;
|
exp = 0;
|
||||||
@ -381,11 +382,19 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
|
|||||||
exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
|
exp = (p->frac_hi & DECOMPOSED_IMPLICIT_BIT) && !fmt->m68k_denormal;
|
||||||
frac_shr(p, frac_shift);
|
frac_shr(p, frac_shift);
|
||||||
|
|
||||||
if (is_tiny && (flags & float_flag_inexact)) {
|
if (is_tiny) {
|
||||||
flags |= float_flag_underflow;
|
if (s->flush_to_zero) {
|
||||||
}
|
assert(s->ftz_detection == float_ftz_after_rounding);
|
||||||
if (exp == 0 && frac_eqz(p)) {
|
flags |= float_flag_output_denormal_flushed;
|
||||||
p->cls = float_class_zero;
|
p->cls = float_class_zero;
|
||||||
|
exp = 0;
|
||||||
|
frac_clear(p);
|
||||||
|
} else if (flags & float_flag_inexact) {
|
||||||
|
flags |= float_flag_underflow;
|
||||||
|
}
|
||||||
|
if (exp == 0 && frac_eqz(p)) {
|
||||||
|
p->cls = float_class_zero;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p->exp = exp;
|
p->exp = exp;
|
||||||
@ -395,7 +404,7 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
|
|||||||
static void partsN(uncanon)(FloatPartsN *p, float_status *s,
|
static void partsN(uncanon)(FloatPartsN *p, float_status *s,
|
||||||
const FloatFmt *fmt)
|
const FloatFmt *fmt)
|
||||||
{
|
{
|
||||||
if (likely(p->cls == float_class_normal)) {
|
if (likely(is_anynorm(p->cls))) {
|
||||||
parts_uncanon_normal(p, s, fmt);
|
parts_uncanon_normal(p, s, fmt);
|
||||||
} else {
|
} else {
|
||||||
switch (p->cls) {
|
switch (p->cls) {
|
||||||
@ -433,9 +442,18 @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
bool b_sign = b->sign ^ subtract;
|
bool b_sign = b->sign ^ subtract;
|
||||||
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For addition and subtraction, we will consume an
|
||||||
|
* input denormal unless the other input is a NaN.
|
||||||
|
*/
|
||||||
|
if ((ab_mask & (float_cmask_denormal | float_cmask_anynan)) ==
|
||||||
|
float_cmask_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
|
|
||||||
if (a->sign != b_sign) {
|
if (a->sign != b_sign) {
|
||||||
/* Subtraction */
|
/* Subtraction */
|
||||||
if (likely(ab_mask == float_cmask_normal)) {
|
if (likely(cmask_is_only_normals(ab_mask))) {
|
||||||
if (parts_sub_normal(a, b)) {
|
if (parts_sub_normal(a, b)) {
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
@ -468,7 +486,7 @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
/* Addition */
|
/* Addition */
|
||||||
if (likely(ab_mask == float_cmask_normal)) {
|
if (likely(cmask_is_only_normals(ab_mask))) {
|
||||||
parts_add_normal(a, b);
|
parts_add_normal(a, b);
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
@ -488,12 +506,12 @@ static FloatPartsN *partsN(addsub)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (b->cls == float_class_zero) {
|
if (b->cls == float_class_zero) {
|
||||||
g_assert(a->cls == float_class_normal);
|
g_assert(is_anynorm(a->cls));
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
g_assert(a->cls == float_class_zero);
|
g_assert(a->cls == float_class_zero);
|
||||||
g_assert(b->cls == float_class_normal);
|
g_assert(is_anynorm(b->cls));
|
||||||
return_b:
|
return_b:
|
||||||
b->sign = b_sign;
|
b->sign = b_sign;
|
||||||
return b;
|
return b;
|
||||||
@ -513,9 +531,13 @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||||
bool sign = a->sign ^ b->sign;
|
bool sign = a->sign ^ b->sign;
|
||||||
|
|
||||||
if (likely(ab_mask == float_cmask_normal)) {
|
if (likely(cmask_is_only_normals(ab_mask))) {
|
||||||
FloatPartsW tmp;
|
FloatPartsW tmp;
|
||||||
|
|
||||||
|
if (ab_mask & float_cmask_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
|
|
||||||
frac_mulw(&tmp, a, b);
|
frac_mulw(&tmp, a, b);
|
||||||
frac_truncjam(a, &tmp);
|
frac_truncjam(a, &tmp);
|
||||||
|
|
||||||
@ -541,6 +563,10 @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Multiply by 0 or Inf */
|
/* Multiply by 0 or Inf */
|
||||||
|
if (ab_mask & float_cmask_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
|
|
||||||
if (ab_mask & float_cmask_inf) {
|
if (ab_mask & float_cmask_inf) {
|
||||||
a->cls = float_class_inf;
|
a->cls = float_class_inf;
|
||||||
a->sign = sign;
|
a->sign = sign;
|
||||||
@ -596,7 +622,7 @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
a->sign ^= 1;
|
a->sign ^= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(ab_mask != float_cmask_normal)) {
|
if (unlikely(!cmask_is_only_normals(ab_mask))) {
|
||||||
if (unlikely(ab_mask == float_cmask_infzero)) {
|
if (unlikely(ab_mask == float_cmask_infzero)) {
|
||||||
float_raise(float_flag_invalid | float_flag_invalid_imz, s);
|
float_raise(float_flag_invalid | float_flag_invalid_imz, s);
|
||||||
goto d_nan;
|
goto d_nan;
|
||||||
@ -611,7 +637,7 @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
}
|
}
|
||||||
|
|
||||||
g_assert(ab_mask & float_cmask_zero);
|
g_assert(ab_mask & float_cmask_zero);
|
||||||
if (c->cls == float_class_normal) {
|
if (is_anynorm(c->cls)) {
|
||||||
*a = *c;
|
*a = *c;
|
||||||
goto return_normal;
|
goto return_normal;
|
||||||
}
|
}
|
||||||
@ -664,6 +690,16 @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
if (flags & float_muladd_negate_result) {
|
if (flags & float_muladd_negate_result) {
|
||||||
a->sign ^= 1;
|
a->sign ^= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* All result types except for "return the default NaN
|
||||||
|
* because this is an Invalid Operation" go through here;
|
||||||
|
* this matches the set of cases where we consumed a
|
||||||
|
* denormal input.
|
||||||
|
*/
|
||||||
|
if (abc_mask & float_cmask_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
return a;
|
return a;
|
||||||
|
|
||||||
return_sub_zero:
|
return_sub_zero:
|
||||||
@ -692,7 +728,10 @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||||
bool sign = a->sign ^ b->sign;
|
bool sign = a->sign ^ b->sign;
|
||||||
|
|
||||||
if (likely(ab_mask == float_cmask_normal)) {
|
if (likely(cmask_is_only_normals(ab_mask))) {
|
||||||
|
if (ab_mask & float_cmask_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
a->sign = sign;
|
a->sign = sign;
|
||||||
a->exp -= b->exp + frac_div(a, b);
|
a->exp -= b->exp + frac_div(a, b);
|
||||||
return a;
|
return a;
|
||||||
@ -713,6 +752,10 @@ static FloatPartsN *partsN(div)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
return parts_pick_nan(a, b, s);
|
return parts_pick_nan(a, b, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((ab_mask & float_cmask_denormal) && b->cls != float_class_zero) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
|
|
||||||
a->sign = sign;
|
a->sign = sign;
|
||||||
|
|
||||||
/* Inf / X */
|
/* Inf / X */
|
||||||
@ -750,7 +793,10 @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
{
|
{
|
||||||
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||||
|
|
||||||
if (likely(ab_mask == float_cmask_normal)) {
|
if (likely(cmask_is_only_normals(ab_mask))) {
|
||||||
|
if (ab_mask & float_cmask_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
frac_modrem(a, b, mod_quot);
|
frac_modrem(a, b, mod_quot);
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
@ -771,6 +817,10 @@ static FloatPartsN *partsN(modrem)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ab_mask & float_cmask_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
|
|
||||||
/* N % Inf; 0 % N */
|
/* N % Inf; 0 % N */
|
||||||
g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
|
g_assert(b->cls == float_class_inf || a->cls == float_class_zero);
|
||||||
return a;
|
return a;
|
||||||
@ -800,6 +850,12 @@ static void partsN(sqrt)(FloatPartsN *a, float_status *status,
|
|||||||
|
|
||||||
if (unlikely(a->cls != float_class_normal)) {
|
if (unlikely(a->cls != float_class_normal)) {
|
||||||
switch (a->cls) {
|
switch (a->cls) {
|
||||||
|
case float_class_denormal:
|
||||||
|
if (!a->sign) {
|
||||||
|
/* -ve denormal will be InvalidOperation */
|
||||||
|
float_raise(float_flag_input_denormal_used, status);
|
||||||
|
}
|
||||||
|
break;
|
||||||
case float_class_snan:
|
case float_class_snan:
|
||||||
case float_class_qnan:
|
case float_class_qnan:
|
||||||
parts_return_nan(a, status);
|
parts_return_nan(a, status);
|
||||||
@ -1130,6 +1186,7 @@ static void partsN(round_to_int)(FloatPartsN *a, FloatRoundMode rmode,
|
|||||||
case float_class_inf:
|
case float_class_inf:
|
||||||
break;
|
break;
|
||||||
case float_class_normal:
|
case float_class_normal:
|
||||||
|
case float_class_denormal:
|
||||||
if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
|
if (parts_round_to_int_normal(a, rmode, scale, fmt->frac_size)) {
|
||||||
float_raise(float_flag_inexact, s);
|
float_raise(float_flag_inexact, s);
|
||||||
}
|
}
|
||||||
@ -1174,6 +1231,7 @@ static int64_t partsN(float_to_sint)(FloatPartsN *p, FloatRoundMode rmode,
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case float_class_normal:
|
case float_class_normal:
|
||||||
|
case float_class_denormal:
|
||||||
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
|
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
|
||||||
if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
|
if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
|
||||||
flags = float_flag_inexact;
|
flags = float_flag_inexact;
|
||||||
@ -1241,6 +1299,7 @@ static uint64_t partsN(float_to_uint)(FloatPartsN *p, FloatRoundMode rmode,
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case float_class_normal:
|
case float_class_normal:
|
||||||
|
case float_class_denormal:
|
||||||
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
|
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
|
||||||
if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
|
if (parts_round_to_int_normal(p, rmode, scale, N - 2)) {
|
||||||
flags = float_flag_inexact;
|
flags = float_flag_inexact;
|
||||||
@ -1304,6 +1363,7 @@ static int64_t partsN(float_to_sint_modulo)(FloatPartsN *p,
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case float_class_normal:
|
case float_class_normal:
|
||||||
|
case float_class_denormal:
|
||||||
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
|
/* TODO: N - 2 is frac_size for rounding; could use input fmt. */
|
||||||
if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
|
if (parts_round_to_int_normal(p, rmode, 0, N - 2)) {
|
||||||
flags = float_flag_inexact;
|
flags = float_flag_inexact;
|
||||||
@ -1425,6 +1485,9 @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
if ((flags & (minmax_isnum | minmax_isnumber))
|
if ((flags & (minmax_isnum | minmax_isnumber))
|
||||||
&& !(ab_mask & float_cmask_snan)
|
&& !(ab_mask & float_cmask_snan)
|
||||||
&& (ab_mask & ~float_cmask_qnan)) {
|
&& (ab_mask & ~float_cmask_qnan)) {
|
||||||
|
if (ab_mask & float_cmask_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
return is_nan(a->cls) ? b : a;
|
return is_nan(a->cls) ? b : a;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1449,12 +1512,17 @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
return parts_pick_nan(a, b, s);
|
return parts_pick_nan(a, b, s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ab_mask & float_cmask_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
|
|
||||||
a_exp = a->exp;
|
a_exp = a->exp;
|
||||||
b_exp = b->exp;
|
b_exp = b->exp;
|
||||||
|
|
||||||
if (unlikely(ab_mask != float_cmask_normal)) {
|
if (unlikely(!cmask_is_only_normals(ab_mask))) {
|
||||||
switch (a->cls) {
|
switch (a->cls) {
|
||||||
case float_class_normal:
|
case float_class_normal:
|
||||||
|
case float_class_denormal:
|
||||||
break;
|
break;
|
||||||
case float_class_inf:
|
case float_class_inf:
|
||||||
a_exp = INT16_MAX;
|
a_exp = INT16_MAX;
|
||||||
@ -1467,6 +1535,7 @@ static FloatPartsN *partsN(minmax)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
}
|
}
|
||||||
switch (b->cls) {
|
switch (b->cls) {
|
||||||
case float_class_normal:
|
case float_class_normal:
|
||||||
|
case float_class_denormal:
|
||||||
break;
|
break;
|
||||||
case float_class_inf:
|
case float_class_inf:
|
||||||
b_exp = INT16_MAX;
|
b_exp = INT16_MAX;
|
||||||
@ -1513,9 +1582,13 @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
{
|
{
|
||||||
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
int ab_mask = float_cmask(a->cls) | float_cmask(b->cls);
|
||||||
|
|
||||||
if (likely(ab_mask == float_cmask_normal)) {
|
if (likely(cmask_is_only_normals(ab_mask))) {
|
||||||
FloatRelation cmp;
|
FloatRelation cmp;
|
||||||
|
|
||||||
|
if (ab_mask & float_cmask_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
|
|
||||||
if (a->sign != b->sign) {
|
if (a->sign != b->sign) {
|
||||||
goto a_sign;
|
goto a_sign;
|
||||||
}
|
}
|
||||||
@ -1541,6 +1614,10 @@ static FloatRelation partsN(compare)(FloatPartsN *a, FloatPartsN *b,
|
|||||||
return float_relation_unordered;
|
return float_relation_unordered;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ab_mask & float_cmask_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
|
|
||||||
if (ab_mask & float_cmask_zero) {
|
if (ab_mask & float_cmask_zero) {
|
||||||
if (ab_mask == float_cmask_zero) {
|
if (ab_mask == float_cmask_zero) {
|
||||||
return float_relation_equal;
|
return float_relation_equal;
|
||||||
@ -1580,6 +1657,9 @@ static void partsN(scalbn)(FloatPartsN *a, int n, float_status *s)
|
|||||||
case float_class_zero:
|
case float_class_zero:
|
||||||
case float_class_inf:
|
case float_class_inf:
|
||||||
break;
|
break;
|
||||||
|
case float_class_denormal:
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
/* fall through */
|
||||||
case float_class_normal:
|
case float_class_normal:
|
||||||
a->exp += MIN(MAX(n, -0x10000), 0x10000);
|
a->exp += MIN(MAX(n, -0x10000), 0x10000);
|
||||||
break;
|
break;
|
||||||
@ -1599,6 +1679,12 @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
|
|||||||
|
|
||||||
if (unlikely(a->cls != float_class_normal)) {
|
if (unlikely(a->cls != float_class_normal)) {
|
||||||
switch (a->cls) {
|
switch (a->cls) {
|
||||||
|
case float_class_denormal:
|
||||||
|
if (!a->sign) {
|
||||||
|
/* -ve denormal will be InvalidOperation */
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
|
break;
|
||||||
case float_class_snan:
|
case float_class_snan:
|
||||||
case float_class_qnan:
|
case float_class_qnan:
|
||||||
parts_return_nan(a, s);
|
parts_return_nan(a, s);
|
||||||
@ -1615,9 +1701,8 @@ static void partsN(log2)(FloatPartsN *a, float_status *s, const FloatFmt *fmt)
|
|||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
default:
|
default:
|
||||||
break;
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
g_assert_not_reached();
|
|
||||||
}
|
}
|
||||||
if (unlikely(a->sign)) {
|
if (unlikely(a->sign)) {
|
||||||
goto d_nan;
|
goto d_nan;
|
||||||
|
@ -404,12 +404,16 @@ float64_gen2(float64 xa, float64 xb, float_status *s,
|
|||||||
/*
|
/*
|
||||||
* Classify a floating point number. Everything above float_class_qnan
|
* Classify a floating point number. Everything above float_class_qnan
|
||||||
* is a NaN so cls >= float_class_qnan is any NaN.
|
* is a NaN so cls >= float_class_qnan is any NaN.
|
||||||
|
*
|
||||||
|
* Note that we canonicalize denormals, so most code should treat
|
||||||
|
* class_normal and class_denormal identically.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
typedef enum __attribute__ ((__packed__)) {
|
typedef enum __attribute__ ((__packed__)) {
|
||||||
float_class_unclassified,
|
float_class_unclassified,
|
||||||
float_class_zero,
|
float_class_zero,
|
||||||
float_class_normal,
|
float_class_normal,
|
||||||
|
float_class_denormal, /* input was a non-squashed denormal */
|
||||||
float_class_inf,
|
float_class_inf,
|
||||||
float_class_qnan, /* all NaNs from here */
|
float_class_qnan, /* all NaNs from here */
|
||||||
float_class_snan,
|
float_class_snan,
|
||||||
@ -420,12 +424,14 @@ typedef enum __attribute__ ((__packed__)) {
|
|||||||
enum {
|
enum {
|
||||||
float_cmask_zero = float_cmask(float_class_zero),
|
float_cmask_zero = float_cmask(float_class_zero),
|
||||||
float_cmask_normal = float_cmask(float_class_normal),
|
float_cmask_normal = float_cmask(float_class_normal),
|
||||||
|
float_cmask_denormal = float_cmask(float_class_denormal),
|
||||||
float_cmask_inf = float_cmask(float_class_inf),
|
float_cmask_inf = float_cmask(float_class_inf),
|
||||||
float_cmask_qnan = float_cmask(float_class_qnan),
|
float_cmask_qnan = float_cmask(float_class_qnan),
|
||||||
float_cmask_snan = float_cmask(float_class_snan),
|
float_cmask_snan = float_cmask(float_class_snan),
|
||||||
|
|
||||||
float_cmask_infzero = float_cmask_zero | float_cmask_inf,
|
float_cmask_infzero = float_cmask_zero | float_cmask_inf,
|
||||||
float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
|
float_cmask_anynan = float_cmask_qnan | float_cmask_snan,
|
||||||
|
float_cmask_anynorm = float_cmask_normal | float_cmask_denormal,
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Flags for parts_minmax. */
|
/* Flags for parts_minmax. */
|
||||||
@ -459,6 +465,20 @@ static inline __attribute__((unused)) bool is_qnan(FloatClass c)
|
|||||||
return c == float_class_qnan;
|
return c == float_class_qnan;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return true if the float_cmask has only normals in it
|
||||||
|
* (including input denormals that were canonicalized)
|
||||||
|
*/
|
||||||
|
static inline bool cmask_is_only_normals(int cmask)
|
||||||
|
{
|
||||||
|
return !(cmask & ~float_cmask_anynorm);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool is_anynorm(FloatClass c)
|
||||||
|
{
|
||||||
|
return float_cmask(c) & float_cmask_anynorm;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Structure holding all of the decomposed parts of a float.
|
* Structure holding all of the decomposed parts of a float.
|
||||||
* The exponent is unbiased and the fraction is normalized.
|
* The exponent is unbiased and the fraction is normalized.
|
||||||
@ -1729,6 +1749,7 @@ static float64 float64r32_round_pack_canonical(FloatParts64 *p,
|
|||||||
*/
|
*/
|
||||||
switch (p->cls) {
|
switch (p->cls) {
|
||||||
case float_class_normal:
|
case float_class_normal:
|
||||||
|
case float_class_denormal:
|
||||||
if (unlikely(p->exp == 0)) {
|
if (unlikely(p->exp == 0)) {
|
||||||
/*
|
/*
|
||||||
* The result is denormal for float32, but can be represented
|
* The result is denormal for float32, but can be represented
|
||||||
@ -1817,6 +1838,7 @@ static floatx80 floatx80_round_pack_canonical(FloatParts128 *p,
|
|||||||
|
|
||||||
switch (p->cls) {
|
switch (p->cls) {
|
||||||
case float_class_normal:
|
case float_class_normal:
|
||||||
|
case float_class_denormal:
|
||||||
if (s->floatx80_rounding_precision == floatx80_precision_x) {
|
if (s->floatx80_rounding_precision == floatx80_precision_x) {
|
||||||
parts_uncanon_normal(p, s, fmt);
|
parts_uncanon_normal(p, s, fmt);
|
||||||
frac = p->frac_hi;
|
frac = p->frac_hi;
|
||||||
@ -2696,6 +2718,9 @@ static void parts_float_to_ahp(FloatParts64 *a, float_status *s)
|
|||||||
float16_params_ahp.frac_size + 1);
|
float16_params_ahp.frac_size + 1);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case float_class_denormal:
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
break;
|
||||||
case float_class_normal:
|
case float_class_normal:
|
||||||
case float_class_zero:
|
case float_class_zero:
|
||||||
break;
|
break;
|
||||||
@ -2710,6 +2735,9 @@ static void parts64_float_to_float(FloatParts64 *a, float_status *s)
|
|||||||
if (is_nan(a->cls)) {
|
if (is_nan(a->cls)) {
|
||||||
parts_return_nan(a, s);
|
parts_return_nan(a, s);
|
||||||
}
|
}
|
||||||
|
if (a->cls == float_class_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void parts128_float_to_float(FloatParts128 *a, float_status *s)
|
static void parts128_float_to_float(FloatParts128 *a, float_status *s)
|
||||||
@ -2717,6 +2745,9 @@ static void parts128_float_to_float(FloatParts128 *a, float_status *s)
|
|||||||
if (is_nan(a->cls)) {
|
if (is_nan(a->cls)) {
|
||||||
parts_return_nan(a, s);
|
parts_return_nan(a, s);
|
||||||
}
|
}
|
||||||
|
if (a->cls == float_class_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define parts_float_to_float(P, S) \
|
#define parts_float_to_float(P, S) \
|
||||||
@ -2729,12 +2760,21 @@ static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
|
|||||||
a->sign = b->sign;
|
a->sign = b->sign;
|
||||||
a->exp = b->exp;
|
a->exp = b->exp;
|
||||||
|
|
||||||
if (a->cls == float_class_normal) {
|
switch (a->cls) {
|
||||||
|
case float_class_denormal:
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
/* fall through */
|
||||||
|
case float_class_normal:
|
||||||
frac_truncjam(a, b);
|
frac_truncjam(a, b);
|
||||||
} else if (is_nan(a->cls)) {
|
break;
|
||||||
|
case float_class_snan:
|
||||||
|
case float_class_qnan:
|
||||||
/* Discard the low bits of the NaN. */
|
/* Discard the low bits of the NaN. */
|
||||||
a->frac = b->frac_hi;
|
a->frac = b->frac_hi;
|
||||||
parts_return_nan(a, s);
|
parts_return_nan(a, s);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2749,6 +2789,9 @@ static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
|
|||||||
if (is_nan(a->cls)) {
|
if (is_nan(a->cls)) {
|
||||||
parts_return_nan(a, s);
|
parts_return_nan(a, s);
|
||||||
}
|
}
|
||||||
|
if (a->cls == float_class_denormal) {
|
||||||
|
float_raise(float_flag_input_denormal_used, s);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
|
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
|
||||||
@ -3218,6 +3261,7 @@ static Int128 float128_to_int128_scalbn(float128 a, FloatRoundMode rmode,
|
|||||||
return int128_zero();
|
return int128_zero();
|
||||||
|
|
||||||
case float_class_normal:
|
case float_class_normal:
|
||||||
|
case float_class_denormal:
|
||||||
if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
|
if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
|
||||||
flags = float_flag_inexact;
|
flags = float_flag_inexact;
|
||||||
}
|
}
|
||||||
@ -3645,6 +3689,7 @@ static Int128 float128_to_uint128_scalbn(float128 a, FloatRoundMode rmode,
|
|||||||
return int128_zero();
|
return int128_zero();
|
||||||
|
|
||||||
case float_class_normal:
|
case float_class_normal:
|
||||||
|
case float_class_denormal:
|
||||||
if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
|
if (parts_round_to_int_normal(&p, rmode, scale, 128 - 2)) {
|
||||||
flags = float_flag_inexact;
|
flags = float_flag_inexact;
|
||||||
if (p.cls == float_class_zero) {
|
if (p.cls == float_class_zero) {
|
||||||
@ -4386,7 +4431,11 @@ float32_hs_compare(float32 xa, float32 xb, float_status *s, bool is_quiet)
|
|||||||
goto soft;
|
goto soft;
|
||||||
}
|
}
|
||||||
|
|
||||||
float32_input_flush2(&ua.s, &ub.s, s);
|
if (unlikely(float32_is_denormal(ua.s) || float32_is_denormal(ub.s))) {
|
||||||
|
/* We may need to set the input_denormal_used flag */
|
||||||
|
goto soft;
|
||||||
|
}
|
||||||
|
|
||||||
if (isgreaterequal(ua.h, ub.h)) {
|
if (isgreaterequal(ua.h, ub.h)) {
|
||||||
if (isgreater(ua.h, ub.h)) {
|
if (isgreater(ua.h, ub.h)) {
|
||||||
return float_relation_greater;
|
return float_relation_greater;
|
||||||
@ -4436,7 +4485,11 @@ float64_hs_compare(float64 xa, float64 xb, float_status *s, bool is_quiet)
|
|||||||
goto soft;
|
goto soft;
|
||||||
}
|
}
|
||||||
|
|
||||||
float64_input_flush2(&ua.s, &ub.s, s);
|
if (unlikely(float64_is_denormal(ua.s) || float64_is_denormal(ub.s))) {
|
||||||
|
/* We may need to set the input_denormal_used flag */
|
||||||
|
goto soft;
|
||||||
|
}
|
||||||
|
|
||||||
if (isgreaterequal(ua.h, ub.h)) {
|
if (isgreaterequal(ua.h, ub.h)) {
|
||||||
if (isgreater(ua.h, ub.h)) {
|
if (isgreater(ua.h, ub.h)) {
|
||||||
return float_relation_greater;
|
return float_relation_greater;
|
||||||
@ -5231,6 +5284,8 @@ float32 float32_exp2(float32 a, float_status *status)
|
|||||||
float32_unpack_canonical(&xp, a, status);
|
float32_unpack_canonical(&xp, a, status);
|
||||||
if (unlikely(xp.cls != float_class_normal)) {
|
if (unlikely(xp.cls != float_class_normal)) {
|
||||||
switch (xp.cls) {
|
switch (xp.cls) {
|
||||||
|
case float_class_denormal:
|
||||||
|
break;
|
||||||
case float_class_snan:
|
case float_class_snan:
|
||||||
case float_class_qnan:
|
case float_class_qnan:
|
||||||
parts_return_nan(&xp, status);
|
parts_return_nan(&xp, status);
|
||||||
@ -5240,9 +5295,8 @@ float32 float32_exp2(float32 a, float_status *status)
|
|||||||
case float_class_zero:
|
case float_class_zero:
|
||||||
return float32_one;
|
return float32_one;
|
||||||
default:
|
default:
|
||||||
break;
|
g_assert_not_reached();
|
||||||
}
|
}
|
||||||
g_assert_not_reached();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
float_raise(float_flag_inexact, status);
|
float_raise(float_flag_inexact, status);
|
||||||
|
@ -109,6 +109,12 @@ static inline void set_flush_inputs_to_zero(bool val, float_status *status)
|
|||||||
status->flush_inputs_to_zero = val;
|
status->flush_inputs_to_zero = val;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void set_float_ftz_detection(FloatFTZDetection d,
|
||||||
|
float_status *status)
|
||||||
|
{
|
||||||
|
status->ftz_detection = d;
|
||||||
|
}
|
||||||
|
|
||||||
static inline void set_default_nan_mode(bool val, float_status *status)
|
static inline void set_default_nan_mode(bool val, float_status *status)
|
||||||
{
|
{
|
||||||
status->default_nan_mode = val;
|
status->default_nan_mode = val;
|
||||||
@ -183,4 +189,9 @@ static inline bool get_default_nan_mode(const float_status *status)
|
|||||||
return status->default_nan_mode;
|
return status->default_nan_mode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline FloatFTZDetection get_float_ftz_detection(const float_status *status)
|
||||||
|
{
|
||||||
|
return status->ftz_detection;
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* SOFTFLOAT_HELPERS_H */
|
#endif /* SOFTFLOAT_HELPERS_H */
|
||||||
|
@ -165,6 +165,13 @@ enum {
|
|||||||
float_flag_invalid_sqrt = 0x0800, /* sqrt(-x) */
|
float_flag_invalid_sqrt = 0x0800, /* sqrt(-x) */
|
||||||
float_flag_invalid_cvti = 0x1000, /* non-nan to integer */
|
float_flag_invalid_cvti = 0x1000, /* non-nan to integer */
|
||||||
float_flag_invalid_snan = 0x2000, /* any operand was snan */
|
float_flag_invalid_snan = 0x2000, /* any operand was snan */
|
||||||
|
/*
|
||||||
|
* An input was denormal and we used it (without flushing it to zero).
|
||||||
|
* Not set if we do not actually use the denormal input (e.g.
|
||||||
|
* because some other input was a NaN, or because the operation
|
||||||
|
* wasn't actually carried out (divide-by-zero; invalid))
|
||||||
|
*/
|
||||||
|
float_flag_input_denormal_used = 0x4000,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -297,6 +304,22 @@ typedef enum __attribute__((__packed__)) {
|
|||||||
float_infzeronan_suppress_invalid = (1 << 7),
|
float_infzeronan_suppress_invalid = (1 << 7),
|
||||||
} FloatInfZeroNaNRule;
|
} FloatInfZeroNaNRule;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When flush_to_zero is set, should we detect denormal results to
|
||||||
|
* be flushed before or after rounding? For most architectures this
|
||||||
|
* should be set to match the tininess_before_rounding setting,
|
||||||
|
* but a few architectures, e.g. MIPS MSA, detect FTZ before
|
||||||
|
* rounding but tininess after rounding.
|
||||||
|
*
|
||||||
|
* This enum is arranged so that the default if the target doesn't
|
||||||
|
* configure it matches the default for tininess_before_rounding
|
||||||
|
* (i.e. "after rounding").
|
||||||
|
*/
|
||||||
|
typedef enum __attribute__((__packed__)) {
|
||||||
|
float_ftz_after_rounding = 0,
|
||||||
|
float_ftz_before_rounding = 1,
|
||||||
|
} FloatFTZDetection;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Floating Point Status. Individual architectures may maintain
|
* Floating Point Status. Individual architectures may maintain
|
||||||
* several versions of float_status for different functions. The
|
* several versions of float_status for different functions. The
|
||||||
@ -314,6 +337,8 @@ typedef struct float_status {
|
|||||||
bool tininess_before_rounding;
|
bool tininess_before_rounding;
|
||||||
/* should denormalised results go to zero and set output_denormal_flushed? */
|
/* should denormalised results go to zero and set output_denormal_flushed? */
|
||||||
bool flush_to_zero;
|
bool flush_to_zero;
|
||||||
|
/* do we detect and flush denormal results before or after rounding? */
|
||||||
|
FloatFTZDetection ftz_detection;
|
||||||
/* should denormalised inputs go to zero and set input_denormal_flushed? */
|
/* should denormalised inputs go to zero and set input_denormal_flushed? */
|
||||||
bool flush_inputs_to_zero;
|
bool flush_inputs_to_zero;
|
||||||
bool default_nan_mode;
|
bool default_nan_mode;
|
||||||
|
@ -202,6 +202,13 @@ static void alpha_cpu_initfn(Object *obj)
|
|||||||
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
|
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
|
||||||
/* Default NaN: sign bit clear, msb frac bit set */
|
/* Default NaN: sign bit clear, msb frac bit set */
|
||||||
set_float_default_nan_pattern(0b01000000, &env->fp_status);
|
set_float_default_nan_pattern(0b01000000, &env->fp_status);
|
||||||
|
/*
|
||||||
|
* TODO: this is incorrect. The Alpha Architecture Handbook version 4
|
||||||
|
* section 4.7.7.11 says that we flush to zero for underflow cases, so
|
||||||
|
* this should be float_ftz_after_rounding to match the
|
||||||
|
* tininess_after_rounding (which is specified in section 4.7.5).
|
||||||
|
*/
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
|
||||||
#if defined(CONFIG_USER_ONLY)
|
#if defined(CONFIG_USER_ONLY)
|
||||||
env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN;
|
env->flags = ENV_FLAG_PS_USER | ENV_FLAG_FEN;
|
||||||
cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD
|
cpu_alpha_store_fpcr(env, (uint64_t)(FPCR_INVD | FPCR_DZED | FPCR_OVFD
|
||||||
|
@ -476,6 +476,8 @@ static uint64_t do_cvttq(CPUAlphaState *env, uint64_t a, int roundmode)
|
|||||||
exc = FPCR_INV;
|
exc = FPCR_INV;
|
||||||
} else if (exc & float_flag_inexact) {
|
} else if (exc & float_flag_inexact) {
|
||||||
exc = FPCR_INE;
|
exc = FPCR_INE;
|
||||||
|
} else {
|
||||||
|
exc = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
env->error_code = exc;
|
env->error_code = exc;
|
||||||
|
@ -597,6 +597,11 @@ static inline bool isar_feature_aa64_mops(const ARMISARegisters *id)
|
|||||||
return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS);
|
return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool isar_feature_aa64_rpres(const ARMISARegisters *id)
|
||||||
|
{
|
||||||
|
return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, RPRES);
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id)
|
static inline bool isar_feature_aa64_fp_simd(const ARMISARegisters *id)
|
||||||
{
|
{
|
||||||
/* We always set the AdvSIMD and FP fields identically. */
|
/* We always set the AdvSIMD and FP fields identically. */
|
||||||
@ -802,6 +807,11 @@ static inline bool isar_feature_aa64_hcx(const ARMISARegisters *id)
|
|||||||
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0;
|
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, HCX) != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool isar_feature_aa64_afp(const ARMISARegisters *id)
|
||||||
|
{
|
||||||
|
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, AFP) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id)
|
static inline bool isar_feature_aa64_tidcp1(const ARMISARegisters *id)
|
||||||
{
|
{
|
||||||
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0;
|
return FIELD_EX64(id->id_aa64mmfr1, ID_AA64MMFR1, TIDCP1) != 0;
|
||||||
|
@ -169,28 +169,6 @@ void arm_register_el_change_hook(ARMCPU *cpu, ARMELChangeHookFn *hook,
|
|||||||
QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);
|
QLIST_INSERT_HEAD(&cpu->el_change_hooks, entry, node);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Set the float_status behaviour to match the Arm defaults:
|
|
||||||
* * tininess-before-rounding
|
|
||||||
* * 2-input NaN propagation prefers SNaN over QNaN, and then
|
|
||||||
* operand A over operand B (see FPProcessNaNs() pseudocode)
|
|
||||||
* * 3-input NaN propagation prefers SNaN over QNaN, and then
|
|
||||||
* operand C over A over B (see FPProcessNaNs3() pseudocode,
|
|
||||||
* but note that for QEMU muladd is a * b + c, whereas for
|
|
||||||
* the pseudocode function the arguments are in the order c, a, b.
|
|
||||||
* * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
|
|
||||||
* and the input NaN if it is signalling
|
|
||||||
* * Default NaN has sign bit clear, msb frac bit set
|
|
||||||
*/
|
|
||||||
static void arm_set_default_fp_behaviours(float_status *s)
|
|
||||||
{
|
|
||||||
set_float_detect_tininess(float_tininess_before_rounding, s);
|
|
||||||
set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
|
|
||||||
set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
|
|
||||||
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
|
|
||||||
set_float_default_nan_pattern(0b01000000, s);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
|
static void cp_reg_reset(gpointer key, gpointer value, gpointer opaque)
|
||||||
{
|
{
|
||||||
/* Reset a single ARMCPRegInfo register */
|
/* Reset a single ARMCPRegInfo register */
|
||||||
@ -568,16 +546,20 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
|
|||||||
env->sau.ctrl = 0;
|
env->sau.ctrl = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
set_flush_to_zero(1, &env->vfp.standard_fp_status);
|
set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]);
|
||||||
set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
|
set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]);
|
||||||
set_default_nan_mode(1, &env->vfp.standard_fp_status);
|
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]);
|
||||||
set_default_nan_mode(1, &env->vfp.standard_fp_status_f16);
|
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
|
||||||
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
|
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]);
|
||||||
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
|
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
|
||||||
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status);
|
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
|
||||||
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
|
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]);
|
||||||
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
|
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
|
||||||
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16);
|
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
|
||||||
|
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]);
|
||||||
|
set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]);
|
||||||
|
set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]);
|
||||||
|
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]);
|
||||||
|
|
||||||
#ifndef CONFIG_USER_ONLY
|
#ifndef CONFIG_USER_ONLY
|
||||||
if (kvm_enabled()) {
|
if (kvm_enabled()) {
|
||||||
|
@ -202,6 +202,61 @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
|
|||||||
|
|
||||||
typedef struct NVICState NVICState;
|
typedef struct NVICState NVICState;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Enum for indexing vfp.fp_status[].
|
||||||
|
*
|
||||||
|
* FPST_A32: is the "normal" fp status for AArch32 insns
|
||||||
|
* FPST_A64: is the "normal" fp status for AArch64 insns
|
||||||
|
* FPST_A32_F16: used for AArch32 half-precision calculations
|
||||||
|
* FPST_A64_F16: used for AArch64 half-precision calculations
|
||||||
|
* FPST_STD: the ARM "Standard FPSCR Value"
|
||||||
|
* FPST_STD_F16: used for half-precision
|
||||||
|
* calculations with the ARM "Standard FPSCR Value"
|
||||||
|
* FPST_AH: used for the A64 insns which change behaviour
|
||||||
|
* when FPCR.AH == 1 (bfloat16 conversions and multiplies,
|
||||||
|
* and the reciprocal and square root estimate/step insns)
|
||||||
|
* FPST_AH_F16: used for the A64 insns which change behaviour
|
||||||
|
* when FPCR.AH == 1 (bfloat16 conversions and multiplies,
|
||||||
|
* and the reciprocal and square root estimate/step insns);
|
||||||
|
* for half-precision
|
||||||
|
*
|
||||||
|
* Half-precision operations are governed by a separate
|
||||||
|
* flush-to-zero control bit in FPSCR:FZ16. We pass a separate
|
||||||
|
* status structure to control this.
|
||||||
|
*
|
||||||
|
* The "Standard FPSCR", ie default-NaN, flush-to-zero,
|
||||||
|
* round-to-nearest and is used by any operations (generally
|
||||||
|
* Neon) which the architecture defines as controlled by the
|
||||||
|
* standard FPSCR value rather than the FPSCR.
|
||||||
|
*
|
||||||
|
* The "standard FPSCR but for fp16 ops" is needed because
|
||||||
|
* the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
|
||||||
|
* using a fixed value for it.
|
||||||
|
*
|
||||||
|
* FPST_AH is needed because some insns have different
|
||||||
|
* behaviour when FPCR.AH == 1: they don't update cumulative
|
||||||
|
* exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
|
||||||
|
* they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
|
||||||
|
* which means we need an FPST_AH_F16 as well.
|
||||||
|
*
|
||||||
|
* To avoid having to transfer exception bits around, we simply
|
||||||
|
* say that the FPSCR cumulative exception flags are the logical
|
||||||
|
* OR of the flags in the four fp statuses. This relies on the
|
||||||
|
* only thing which needs to read the exception flags being
|
||||||
|
* an explicit FPSCR read.
|
||||||
|
*/
|
||||||
|
typedef enum ARMFPStatusFlavour {
|
||||||
|
FPST_A32,
|
||||||
|
FPST_A64,
|
||||||
|
FPST_A32_F16,
|
||||||
|
FPST_A64_F16,
|
||||||
|
FPST_AH,
|
||||||
|
FPST_AH_F16,
|
||||||
|
FPST_STD,
|
||||||
|
FPST_STD_F16,
|
||||||
|
} ARMFPStatusFlavour;
|
||||||
|
#define FPST_COUNT 8
|
||||||
|
|
||||||
typedef struct CPUArchState {
|
typedef struct CPUArchState {
|
||||||
/* Regs for current mode. */
|
/* Regs for current mode. */
|
||||||
uint32_t regs[16];
|
uint32_t regs[16];
|
||||||
@ -631,41 +686,8 @@ typedef struct CPUArchState {
|
|||||||
/* Scratch space for aa32 neon expansion. */
|
/* Scratch space for aa32 neon expansion. */
|
||||||
uint32_t scratch[8];
|
uint32_t scratch[8];
|
||||||
|
|
||||||
/* There are a number of distinct float control structures:
|
/* There are a number of distinct float control structures. */
|
||||||
*
|
float_status fp_status[FPST_COUNT];
|
||||||
* fp_status_a32: is the "normal" fp status for AArch32 insns
|
|
||||||
* fp_status_a64: is the "normal" fp status for AArch64 insns
|
|
||||||
* fp_status_fp16_a32: used for AArch32 half-precision calculations
|
|
||||||
* fp_status_fp16_a64: used for AArch64 half-precision calculations
|
|
||||||
* standard_fp_status : the ARM "Standard FPSCR Value"
|
|
||||||
* standard_fp_status_fp16 : used for half-precision
|
|
||||||
* calculations with the ARM "Standard FPSCR Value"
|
|
||||||
*
|
|
||||||
* Half-precision operations are governed by a separate
|
|
||||||
* flush-to-zero control bit in FPSCR:FZ16. We pass a separate
|
|
||||||
* status structure to control this.
|
|
||||||
*
|
|
||||||
* The "Standard FPSCR", ie default-NaN, flush-to-zero,
|
|
||||||
* round-to-nearest and is used by any operations (generally
|
|
||||||
* Neon) which the architecture defines as controlled by the
|
|
||||||
* standard FPSCR value rather than the FPSCR.
|
|
||||||
*
|
|
||||||
* The "standard FPSCR but for fp16 ops" is needed because
|
|
||||||
* the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
|
|
||||||
* using a fixed value for it.
|
|
||||||
*
|
|
||||||
* To avoid having to transfer exception bits around, we simply
|
|
||||||
* say that the FPSCR cumulative exception flags are the logical
|
|
||||||
* OR of the flags in the four fp statuses. This relies on the
|
|
||||||
* only thing which needs to read the exception flags being
|
|
||||||
* an explicit FPSCR read.
|
|
||||||
*/
|
|
||||||
float_status fp_status_a32;
|
|
||||||
float_status fp_status_a64;
|
|
||||||
float_status fp_status_f16_a32;
|
|
||||||
float_status fp_status_f16_a64;
|
|
||||||
float_status standard_fp_status;
|
|
||||||
float_status standard_fp_status_f16;
|
|
||||||
|
|
||||||
uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
|
uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
|
||||||
uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
|
uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
|
||||||
@ -1714,6 +1736,9 @@ void vfp_set_fpscr(CPUARMState *env, uint32_t val);
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* FPCR bits */
|
/* FPCR bits */
|
||||||
|
#define FPCR_FIZ (1 << 0) /* Flush Inputs to Zero (FEAT_AFP) */
|
||||||
|
#define FPCR_AH (1 << 1) /* Alternate Handling (FEAT_AFP) */
|
||||||
|
#define FPCR_NEP (1 << 2) /* SIMD scalar ops preserve elts (FEAT_AFP) */
|
||||||
#define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */
|
#define FPCR_IOE (1 << 8) /* Invalid Operation exception trap enable */
|
||||||
#define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */
|
#define FPCR_DZE (1 << 9) /* Divide by Zero exception trap enable */
|
||||||
#define FPCR_OFE (1 << 10) /* Overflow exception trap enable */
|
#define FPCR_OFE (1 << 10) /* Overflow exception trap enable */
|
||||||
@ -3195,6 +3220,8 @@ FIELD(TBFLAG_A64, NV2, 34, 1)
|
|||||||
FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
|
FIELD(TBFLAG_A64, NV2_MEM_E20, 35, 1)
|
||||||
/* Set if FEAT_NV2 RAM accesses are big-endian */
|
/* Set if FEAT_NV2 RAM accesses are big-endian */
|
||||||
FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
|
FIELD(TBFLAG_A64, NV2_MEM_BE, 36, 1)
|
||||||
|
FIELD(TBFLAG_A64, AH, 37, 1) /* FPCR.AH */
|
||||||
|
FIELD(TBFLAG_A64, NEP, 38, 1) /* FPCR.NEP */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Helpers for using the above. Note that only the A64 accessors use
|
* Helpers for using the above. Note that only the A64 accessors use
|
||||||
|
@ -4848,7 +4848,7 @@ static const ARMCPRegInfo v8_cp_reginfo[] = {
|
|||||||
.writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore },
|
.writefn = aa64_daif_write, .resetfn = arm_cp_reset_ignore },
|
||||||
{ .name = "FPCR", .state = ARM_CP_STATE_AA64,
|
{ .name = "FPCR", .state = ARM_CP_STATE_AA64,
|
||||||
.opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4,
|
.opc0 = 3, .opc1 = 3, .opc2 = 0, .crn = 4, .crm = 4,
|
||||||
.access = PL0_RW, .type = ARM_CP_FPU | ARM_CP_SUPPRESS_TB_END,
|
.access = PL0_RW, .type = ARM_CP_FPU,
|
||||||
.readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write },
|
.readfn = aa64_fpcr_read, .writefn = aa64_fpcr_write },
|
||||||
{ .name = "FPSR", .state = ARM_CP_STATE_AA64,
|
{ .name = "FPSR", .state = ARM_CP_STATE_AA64,
|
||||||
.opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
|
.opc0 = 3, .opc1 = 3, .opc2 = 1, .crn = 4, .crm = 4,
|
||||||
|
@ -245,9 +245,11 @@ DEF_HELPER_4(vfp_muladdh, f16, f16, f16, f16, fpst)
|
|||||||
|
|
||||||
DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
|
DEF_HELPER_FLAGS_2(recpe_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
|
||||||
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
|
DEF_HELPER_FLAGS_2(recpe_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
|
||||||
|
DEF_HELPER_FLAGS_2(recpe_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
|
||||||
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
|
DEF_HELPER_FLAGS_2(recpe_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
|
||||||
DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
|
DEF_HELPER_FLAGS_2(rsqrte_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
|
||||||
DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
|
DEF_HELPER_FLAGS_2(rsqrte_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
|
||||||
|
DEF_HELPER_FLAGS_2(rsqrte_rpres_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
|
||||||
DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
|
DEF_HELPER_FLAGS_2(rsqrte_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
|
||||||
DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32)
|
DEF_HELPER_FLAGS_1(recpe_u32, TCG_CALL_NO_RWG, i32, i32)
|
||||||
DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32)
|
DEF_HELPER_FLAGS_1(rsqrte_u32, TCG_CALL_NO_RWG, i32, i32)
|
||||||
@ -680,10 +682,12 @@ DEF_HELPER_FLAGS_4(gvec_vrintx_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
|||||||
|
|
||||||
DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_4(gvec_frecpe_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_4(gvec_frecpe_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_4(gvec_frecpe_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_4(gvec_frecpe_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_4(gvec_frsqrte_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_4(gvec_frsqrte_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_4(gvec_frsqrte_rpres_s, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_4(gvec_frsqrte_d, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_4(gvec_fcgt0_h, TCG_CALL_NO_RWG, void, ptr, ptr, fpst, i32)
|
||||||
@ -722,6 +726,10 @@ DEF_HELPER_FLAGS_5(gvec_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
|||||||
DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_5(gvec_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_5(gvec_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fabd_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fabd_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fabd_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_5(gvec_fceq_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_5(gvec_fceq_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_5(gvec_fceq_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
@ -778,6 +786,10 @@ DEF_HELPER_FLAGS_5(gvec_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
|||||||
DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_5(gvec_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
DEF_HELPER_FLAGS_5(gvec_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_vfms_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_vfms_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_vfms_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(gvec_ftsmul_h, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, fpst, i32)
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(gvec_ftsmul_s, TCG_CALL_NO_RWG,
|
||||||
@ -809,6 +821,20 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
|
|||||||
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, i32)
|
void, ptr, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
|
||||||
|
@ -1828,4 +1828,10 @@ uint64_t gt_virt_cnt_offset(CPUARMState *env);
|
|||||||
* all EL1" scope; this covers stage 1 and stage 2.
|
* all EL1" scope; this covers stage 1 and stage 2.
|
||||||
*/
|
*/
|
||||||
int alle1_tlbmask(CPUARMState *env);
|
int alle1_tlbmask(CPUARMState *env);
|
||||||
|
|
||||||
|
/* Set the float_status behaviour to match the Arm defaults */
|
||||||
|
void arm_set_default_fp_behaviours(float_status *s);
|
||||||
|
/* Set the float_status behaviour to match Arm FPCR.AH=1 behaviour */
|
||||||
|
void arm_set_ah_fp_behaviours(float_status *s);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1167,6 +1167,7 @@ void aarch64_max_tcg_initfn(Object *obj)
|
|||||||
cpu->isar.id_aa64isar1 = t;
|
cpu->isar.id_aa64isar1 = t;
|
||||||
|
|
||||||
t = cpu->isar.id_aa64isar2;
|
t = cpu->isar.id_aa64isar2;
|
||||||
|
t = FIELD_DP64(t, ID_AA64ISAR2, RPRES, 1); /* FEAT_RPRES */
|
||||||
t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */
|
t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */
|
||||||
t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */
|
t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */
|
||||||
t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */
|
t = FIELD_DP64(t, ID_AA64ISAR2, WFXT, 2); /* FEAT_WFxT */
|
||||||
@ -1218,6 +1219,7 @@ void aarch64_max_tcg_initfn(Object *obj)
|
|||||||
t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */
|
t = FIELD_DP64(t, ID_AA64MMFR1, XNX, 1); /* FEAT_XNX */
|
||||||
t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */
|
t = FIELD_DP64(t, ID_AA64MMFR1, ETS, 2); /* FEAT_ETS2 */
|
||||||
t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */
|
t = FIELD_DP64(t, ID_AA64MMFR1, HCX, 1); /* FEAT_HCX */
|
||||||
|
t = FIELD_DP64(t, ID_AA64MMFR1, AFP, 1); /* FEAT_AFP */
|
||||||
t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */
|
t = FIELD_DP64(t, ID_AA64MMFR1, TIDCP1, 1); /* FEAT_TIDCP1 */
|
||||||
t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */
|
t = FIELD_DP64(t, ID_AA64MMFR1, CMOW, 1); /* FEAT_CMOW */
|
||||||
cpu->isar.id_aa64mmfr1 = t;
|
cpu->isar.id_aa64mmfr1 = t;
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
#ifdef CONFIG_USER_ONLY
|
#ifdef CONFIG_USER_ONLY
|
||||||
#include "user/page-protection.h"
|
#include "user/page-protection.h"
|
||||||
#endif
|
#endif
|
||||||
|
#include "vec_internal.h"
|
||||||
|
|
||||||
/* C2.4.7 Multiply and divide */
|
/* C2.4.7 Multiply and divide */
|
||||||
/* special cases for 0 and LLONG_MIN are mandated by the standard */
|
/* special cases for 0 and LLONG_MIN are mandated by the standard */
|
||||||
@ -208,88 +209,52 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst)
|
|||||||
return -float64_lt(b, a, fpst);
|
return -float64_lt(b, a, fpst);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Reciprocal step and sqrt step. Note that unlike the A32/T32
|
/*
|
||||||
|
* Reciprocal step and sqrt step. Note that unlike the A32/T32
|
||||||
* versions, these do a fully fused multiply-add or
|
* versions, these do a fully fused multiply-add or
|
||||||
* multiply-add-and-halve.
|
* multiply-add-and-halve.
|
||||||
|
* The FPCR.AH == 1 versions need to avoid flipping the sign of NaN.
|
||||||
*/
|
*/
|
||||||
|
#define DO_RECPS(NAME, CTYPE, FLOATTYPE, CHSFN) \
|
||||||
uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
|
CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
|
||||||
{
|
{ \
|
||||||
a = float16_squash_input_denormal(a, fpst);
|
a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
|
||||||
b = float16_squash_input_denormal(b, fpst);
|
b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
|
||||||
|
a = FLOATTYPE ## _ ## CHSFN(a); \
|
||||||
a = float16_chs(a);
|
if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \
|
||||||
if ((float16_is_infinity(a) && float16_is_zero(b)) ||
|
(FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \
|
||||||
(float16_is_infinity(b) && float16_is_zero(a))) {
|
return FLOATTYPE ## _two; \
|
||||||
return float16_two;
|
} \
|
||||||
|
return FLOATTYPE ## _muladd(a, b, FLOATTYPE ## _two, 0, fpst); \
|
||||||
}
|
}
|
||||||
return float16_muladd(a, b, float16_two, 0, fpst);
|
|
||||||
}
|
|
||||||
|
|
||||||
float32 HELPER(recpsf_f32)(float32 a, float32 b, float_status *fpst)
|
DO_RECPS(recpsf_f16, uint32_t, float16, chs)
|
||||||
{
|
DO_RECPS(recpsf_f32, float32, float32, chs)
|
||||||
a = float32_squash_input_denormal(a, fpst);
|
DO_RECPS(recpsf_f64, float64, float64, chs)
|
||||||
b = float32_squash_input_denormal(b, fpst);
|
DO_RECPS(recpsf_ah_f16, uint32_t, float16, ah_chs)
|
||||||
|
DO_RECPS(recpsf_ah_f32, float32, float32, ah_chs)
|
||||||
|
DO_RECPS(recpsf_ah_f64, float64, float64, ah_chs)
|
||||||
|
|
||||||
a = float32_chs(a);
|
#define DO_RSQRTSF(NAME, CTYPE, FLOATTYPE, CHSFN) \
|
||||||
if ((float32_is_infinity(a) && float32_is_zero(b)) ||
|
CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
|
||||||
(float32_is_infinity(b) && float32_is_zero(a))) {
|
{ \
|
||||||
return float32_two;
|
a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
|
||||||
}
|
b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
|
||||||
return float32_muladd(a, b, float32_two, 0, fpst);
|
a = FLOATTYPE ## _ ## CHSFN(a); \
|
||||||
}
|
if ((FLOATTYPE ## _is_infinity(a) && FLOATTYPE ## _is_zero(b)) || \
|
||||||
|
(FLOATTYPE ## _is_infinity(b) && FLOATTYPE ## _is_zero(a))) { \
|
||||||
|
return FLOATTYPE ## _one_point_five; \
|
||||||
|
} \
|
||||||
|
return FLOATTYPE ## _muladd_scalbn(a, b, FLOATTYPE ## _three, \
|
||||||
|
-1, 0, fpst); \
|
||||||
|
} \
|
||||||
|
|
||||||
float64 HELPER(recpsf_f64)(float64 a, float64 b, float_status *fpst)
|
DO_RSQRTSF(rsqrtsf_f16, uint32_t, float16, chs)
|
||||||
{
|
DO_RSQRTSF(rsqrtsf_f32, float32, float32, chs)
|
||||||
a = float64_squash_input_denormal(a, fpst);
|
DO_RSQRTSF(rsqrtsf_f64, float64, float64, chs)
|
||||||
b = float64_squash_input_denormal(b, fpst);
|
DO_RSQRTSF(rsqrtsf_ah_f16, uint32_t, float16, ah_chs)
|
||||||
|
DO_RSQRTSF(rsqrtsf_ah_f32, float32, float32, ah_chs)
|
||||||
a = float64_chs(a);
|
DO_RSQRTSF(rsqrtsf_ah_f64, float64, float64, ah_chs)
|
||||||
if ((float64_is_infinity(a) && float64_is_zero(b)) ||
|
|
||||||
(float64_is_infinity(b) && float64_is_zero(a))) {
|
|
||||||
return float64_two;
|
|
||||||
}
|
|
||||||
return float64_muladd(a, b, float64_two, 0, fpst);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, float_status *fpst)
|
|
||||||
{
|
|
||||||
a = float16_squash_input_denormal(a, fpst);
|
|
||||||
b = float16_squash_input_denormal(b, fpst);
|
|
||||||
|
|
||||||
a = float16_chs(a);
|
|
||||||
if ((float16_is_infinity(a) && float16_is_zero(b)) ||
|
|
||||||
(float16_is_infinity(b) && float16_is_zero(a))) {
|
|
||||||
return float16_one_point_five;
|
|
||||||
}
|
|
||||||
return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
|
|
||||||
}
|
|
||||||
|
|
||||||
float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, float_status *fpst)
|
|
||||||
{
|
|
||||||
a = float32_squash_input_denormal(a, fpst);
|
|
||||||
b = float32_squash_input_denormal(b, fpst);
|
|
||||||
|
|
||||||
a = float32_chs(a);
|
|
||||||
if ((float32_is_infinity(a) && float32_is_zero(b)) ||
|
|
||||||
(float32_is_infinity(b) && float32_is_zero(a))) {
|
|
||||||
return float32_one_point_five;
|
|
||||||
}
|
|
||||||
return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
|
|
||||||
}
|
|
||||||
|
|
||||||
float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, float_status *fpst)
|
|
||||||
{
|
|
||||||
a = float64_squash_input_denormal(a, fpst);
|
|
||||||
b = float64_squash_input_denormal(b, fpst);
|
|
||||||
|
|
||||||
a = float64_chs(a);
|
|
||||||
if ((float64_is_infinity(a) && float64_is_zero(b)) ||
|
|
||||||
(float64_is_infinity(b) && float64_is_zero(a))) {
|
|
||||||
return float64_one_point_five;
|
|
||||||
}
|
|
||||||
return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
|
/* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */
|
||||||
uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst)
|
uint32_t HELPER(frecpx_f16)(uint32_t a, float_status *fpst)
|
||||||
@ -399,6 +364,42 @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, float_status *fpst)
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AH=1 min/max have some odd special cases:
|
||||||
|
* comparing two zeroes (regardless of sign), (NaN, anything),
|
||||||
|
* or (anything, NaN) should return the second argument (possibly
|
||||||
|
* squashed to zero).
|
||||||
|
* Also, denormal outputs are not squashed to zero regardless of FZ or FZ16.
|
||||||
|
*/
|
||||||
|
#define AH_MINMAX_HELPER(NAME, CTYPE, FLOATTYPE, MINMAX) \
|
||||||
|
CTYPE HELPER(NAME)(CTYPE a, CTYPE b, float_status *fpst) \
|
||||||
|
{ \
|
||||||
|
bool save; \
|
||||||
|
CTYPE r; \
|
||||||
|
a = FLOATTYPE ## _squash_input_denormal(a, fpst); \
|
||||||
|
b = FLOATTYPE ## _squash_input_denormal(b, fpst); \
|
||||||
|
if (FLOATTYPE ## _is_zero(a) && FLOATTYPE ## _is_zero(b)) { \
|
||||||
|
return b; \
|
||||||
|
} \
|
||||||
|
if (FLOATTYPE ## _is_any_nan(a) || \
|
||||||
|
FLOATTYPE ## _is_any_nan(b)) { \
|
||||||
|
float_raise(float_flag_invalid, fpst); \
|
||||||
|
return b; \
|
||||||
|
} \
|
||||||
|
save = get_flush_to_zero(fpst); \
|
||||||
|
set_flush_to_zero(false, fpst); \
|
||||||
|
r = FLOATTYPE ## _ ## MINMAX(a, b, fpst); \
|
||||||
|
set_flush_to_zero(save, fpst); \
|
||||||
|
return r; \
|
||||||
|
}
|
||||||
|
|
||||||
|
AH_MINMAX_HELPER(vfp_ah_minh, dh_ctype_f16, float16, min)
|
||||||
|
AH_MINMAX_HELPER(vfp_ah_mins, float32, float32, min)
|
||||||
|
AH_MINMAX_HELPER(vfp_ah_mind, float64, float64, min)
|
||||||
|
AH_MINMAX_HELPER(vfp_ah_maxh, dh_ctype_f16, float16, max)
|
||||||
|
AH_MINMAX_HELPER(vfp_ah_maxs, float32, float32, max)
|
||||||
|
AH_MINMAX_HELPER(vfp_ah_maxd, float64, float64, max)
|
||||||
|
|
||||||
/* 64-bit versions of the CRC helpers. Note that although the operation
|
/* 64-bit versions of the CRC helpers. Note that although the operation
|
||||||
* (and the prototypes of crc32c() and crc32() mean that only the bottom
|
* (and the prototypes of crc32c() and crc32() mean that only the bottom
|
||||||
* 32 bits of the accumulator and result are used, we pass and return
|
* 32 bits of the accumulator and result are used, we pass and return
|
||||||
|
@ -38,9 +38,15 @@ DEF_HELPER_FLAGS_3(neon_cgt_f64, TCG_CALL_NO_RWG, i64, i64, i64, fpst)
|
|||||||
DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
|
DEF_HELPER_FLAGS_3(recpsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
|
||||||
DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
|
DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
|
||||||
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
|
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
|
||||||
|
DEF_HELPER_FLAGS_3(recpsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
|
||||||
|
DEF_HELPER_FLAGS_3(recpsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
|
||||||
|
DEF_HELPER_FLAGS_3(recpsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
|
||||||
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
|
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
|
||||||
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
|
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
|
||||||
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
|
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
|
||||||
|
DEF_HELPER_FLAGS_3(rsqrtsf_ah_f16, TCG_CALL_NO_RWG, f16, f16, f16, fpst)
|
||||||
|
DEF_HELPER_FLAGS_3(rsqrtsf_ah_f32, TCG_CALL_NO_RWG, f32, f32, f32, fpst)
|
||||||
|
DEF_HELPER_FLAGS_3(rsqrtsf_ah_f64, TCG_CALL_NO_RWG, f64, f64, f64, fpst)
|
||||||
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
|
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, fpst)
|
||||||
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
|
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, fpst)
|
||||||
DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
|
DEF_HELPER_FLAGS_2(frecpx_f16, TCG_CALL_NO_RWG, f16, f16, fpst)
|
||||||
@ -67,6 +73,13 @@ DEF_HELPER_4(advsimd_muladd2h, i32, i32, i32, i32, fpst)
|
|||||||
DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst)
|
DEF_HELPER_2(advsimd_rinth_exact, f16, f16, fpst)
|
||||||
DEF_HELPER_2(advsimd_rinth, f16, f16, fpst)
|
DEF_HELPER_2(advsimd_rinth, f16, f16, fpst)
|
||||||
|
|
||||||
|
DEF_HELPER_3(vfp_ah_minh, f16, f16, f16, fpst)
|
||||||
|
DEF_HELPER_3(vfp_ah_mins, f32, f32, f32, fpst)
|
||||||
|
DEF_HELPER_3(vfp_ah_mind, f64, f64, f64, fpst)
|
||||||
|
DEF_HELPER_3(vfp_ah_maxh, f16, f16, f16, fpst)
|
||||||
|
DEF_HELPER_3(vfp_ah_maxs, f32, f32, f32, fpst)
|
||||||
|
DEF_HELPER_3(vfp_ah_maxd, f64, f64, f64, fpst)
|
||||||
|
|
||||||
DEF_HELPER_2(exception_return, void, env, i64)
|
DEF_HELPER_2(exception_return, void, env, i64)
|
||||||
DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64)
|
DEF_HELPER_FLAGS_2(dc_zva, TCG_CALL_NO_WG, void, env, i64)
|
||||||
|
|
||||||
|
@ -541,10 +541,18 @@ DEF_HELPER_FLAGS_4(sve_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
|||||||
DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(sve_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(sve_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_4(sve_ah_fabs_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
|
DEF_HELPER_FLAGS_4(sve_ah_fabs_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
|
DEF_HELPER_FLAGS_4(sve_ah_fabs_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(sve_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(sve_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(sve_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_4(sve_ah_fneg_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
|
DEF_HELPER_FLAGS_4(sve_ah_fneg_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
|
DEF_HELPER_FLAGS_4(sve_ah_fneg_d, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(sve_not_zpz_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(sve_not_zpz_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
DEF_HELPER_FLAGS_4(sve_not_zpz_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
|
||||||
@ -972,6 +980,48 @@ DEF_HELPER_FLAGS_5(gvec_rsqrts_s, TCG_CALL_NO_RWG,
|
|||||||
DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(gvec_rsqrts_d, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, fpst, i32)
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_recps_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_recps_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_recps_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_rsqrts_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fmax_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fmax_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fmax_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fmin_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fmin_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fmin_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fmaxp_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fminp_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fminp_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_5(gvec_ah_fminp_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_4(sve_faddv_h, TCG_CALL_NO_RWG,
|
||||||
i64, ptr, ptr, fpst, i32)
|
i64, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_4(sve_faddv_s, TCG_CALL_NO_RWG,
|
||||||
@ -1007,6 +1057,20 @@ DEF_HELPER_FLAGS_4(sve_fminv_s, TCG_CALL_NO_RWG,
|
|||||||
DEF_HELPER_FLAGS_4(sve_fminv_d, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_4(sve_fminv_d, TCG_CALL_NO_RWG,
|
||||||
i64, ptr, ptr, fpst, i32)
|
i64, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_4(sve_ah_fmaxv_h, TCG_CALL_NO_RWG,
|
||||||
|
i64, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_4(sve_ah_fmaxv_s, TCG_CALL_NO_RWG,
|
||||||
|
i64, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_4(sve_ah_fmaxv_d, TCG_CALL_NO_RWG,
|
||||||
|
i64, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_4(sve_ah_fminv_h, TCG_CALL_NO_RWG,
|
||||||
|
i64, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_4(sve_ah_fminv_s, TCG_CALL_NO_RWG,
|
||||||
|
i64, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_4(sve_ah_fminv_d, TCG_CALL_NO_RWG,
|
||||||
|
i64, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(sve_fadda_h, TCG_CALL_NO_RWG,
|
||||||
i64, i64, ptr, ptr, fpst, i32)
|
i64, i64, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(sve_fadda_s, TCG_CALL_NO_RWG,
|
||||||
@ -1098,6 +1162,20 @@ DEF_HELPER_FLAGS_6(sve_fmax_s, TCG_CALL_NO_RWG,
|
|||||||
DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(sve_fmax_d, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fmin_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fmin_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fmin_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fmax_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fmax_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fmax_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(sve_fminnum_h, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(sve_fminnum_s, TCG_CALL_NO_RWG,
|
||||||
@ -1119,6 +1197,13 @@ DEF_HELPER_FLAGS_6(sve_fabd_s, TCG_CALL_NO_RWG,
|
|||||||
DEF_HELPER_FLAGS_6(sve_fabd_d, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(sve_fabd_d, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fabd_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fabd_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fabd_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_6(sve_fscalbn_h, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(sve_fscalbn_h, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, fpst, i32)
|
void, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_6(sve_fscalbn_s, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(sve_fscalbn_s, TCG_CALL_NO_RWG,
|
||||||
@ -1189,6 +1274,20 @@ DEF_HELPER_FLAGS_6(sve_fmins_s, TCG_CALL_NO_RWG,
|
|||||||
DEF_HELPER_FLAGS_6(sve_fmins_d, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_6(sve_fmins_d, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, i64, fpst, i32)
|
void, ptr, ptr, ptr, i64, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fmaxs_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, i64, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fmaxs_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, i64, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fmaxs_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, i64, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fmins_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, i64, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fmins_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, i64, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_6(sve_ah_fmins_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, i64, fpst, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_5(sve_fcvt_sh, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(sve_fcvt_sh, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, fpst, i32)
|
void, ptr, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_5(sve_fcvt_dh, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_5(sve_fcvt_dh, TCG_CALL_NO_RWG,
|
||||||
@ -1376,6 +1475,27 @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
|
|||||||
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
|
DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
|
||||||
|
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
|
|
||||||
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG,
|
||||||
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
|
||||||
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG,
|
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG,
|
||||||
|
@ -404,6 +404,19 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el,
|
|||||||
DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx));
|
DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (env->vfp.fpcr & FPCR_AH) {
|
||||||
|
DP_TBFLAG_A64(flags, AH, 1);
|
||||||
|
}
|
||||||
|
if (env->vfp.fpcr & FPCR_NEP) {
|
||||||
|
/*
|
||||||
|
* In streaming-SVE without FA64, NEP behaves as if zero;
|
||||||
|
* compare pseudocode IsMerging()
|
||||||
|
*/
|
||||||
|
if (!(EX_TBFLAG_A64(flags, PSTATE_SM) && !sme_fa64(env, el))) {
|
||||||
|
DP_TBFLAG_A64(flags, NEP, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
|
return rebuild_hflags_common(env, fp_el, mmu_idx, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2814,8 +2814,7 @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
|
|||||||
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
|
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
|
||||||
continue; \
|
continue; \
|
||||||
} \
|
} \
|
||||||
fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
|
fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
|
||||||
&env->vfp.standard_fp_status; \
|
|
||||||
if (!(mask & 1)) { \
|
if (!(mask & 1)) { \
|
||||||
/* We need the result but without updating flags */ \
|
/* We need the result but without updating flags */ \
|
||||||
scratch_fpst = *fpst; \
|
scratch_fpst = *fpst; \
|
||||||
@ -2888,8 +2887,7 @@ DO_2OP_FP_ALL(vminnma, minnuma)
|
|||||||
r[e] = 0; \
|
r[e] = 0; \
|
||||||
continue; \
|
continue; \
|
||||||
} \
|
} \
|
||||||
fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
|
fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
|
||||||
&env->vfp.standard_fp_status; \
|
|
||||||
if (!(tm & 1)) { \
|
if (!(tm & 1)) { \
|
||||||
/* We need the result but without updating flags */ \
|
/* We need the result but without updating flags */ \
|
||||||
scratch_fpst = *fpst; \
|
scratch_fpst = *fpst; \
|
||||||
@ -2926,8 +2924,7 @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
|
|||||||
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
|
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
|
||||||
continue; \
|
continue; \
|
||||||
} \
|
} \
|
||||||
fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
|
fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
|
||||||
&env->vfp.standard_fp_status; \
|
|
||||||
if (!(mask & 1)) { \
|
if (!(mask & 1)) { \
|
||||||
/* We need the result but without updating flags */ \
|
/* We need the result but without updating flags */ \
|
||||||
scratch_fpst = *fpst; \
|
scratch_fpst = *fpst; \
|
||||||
@ -2964,8 +2961,7 @@ DO_VFMA(vfmss, 4, float32, true)
|
|||||||
if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \
|
if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \
|
||||||
continue; \
|
continue; \
|
||||||
} \
|
} \
|
||||||
fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
|
fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
|
||||||
&env->vfp.standard_fp_status; \
|
|
||||||
fpst1 = fpst0; \
|
fpst1 = fpst0; \
|
||||||
if (!(mask & 1)) { \
|
if (!(mask & 1)) { \
|
||||||
scratch_fpst = *fpst0; \
|
scratch_fpst = *fpst0; \
|
||||||
@ -3049,8 +3045,7 @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
|
|||||||
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
|
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
|
||||||
continue; \
|
continue; \
|
||||||
} \
|
} \
|
||||||
fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
|
fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
|
||||||
&env->vfp.standard_fp_status; \
|
|
||||||
if (!(mask & 1)) { \
|
if (!(mask & 1)) { \
|
||||||
/* We need the result but without updating flags */ \
|
/* We need the result but without updating flags */ \
|
||||||
scratch_fpst = *fpst; \
|
scratch_fpst = *fpst; \
|
||||||
@ -3084,8 +3079,7 @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
|
|||||||
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
|
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
|
||||||
continue; \
|
continue; \
|
||||||
} \
|
} \
|
||||||
fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
|
fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
|
||||||
&env->vfp.standard_fp_status; \
|
|
||||||
if (!(mask & 1)) { \
|
if (!(mask & 1)) { \
|
||||||
/* We need the result but without updating flags */ \
|
/* We need the result but without updating flags */ \
|
||||||
scratch_fpst = *fpst; \
|
scratch_fpst = *fpst; \
|
||||||
@ -3116,9 +3110,8 @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
|
|||||||
unsigned e; \
|
unsigned e; \
|
||||||
TYPE *m = vm; \
|
TYPE *m = vm; \
|
||||||
TYPE ra = (TYPE)ra_in; \
|
TYPE ra = (TYPE)ra_in; \
|
||||||
float_status *fpst = (ESIZE == 2) ? \
|
float_status *fpst = \
|
||||||
&env->vfp.standard_fp_status_f16 : \
|
&env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
|
||||||
&env->vfp.standard_fp_status; \
|
|
||||||
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
|
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
|
||||||
if (mask & 1) { \
|
if (mask & 1) { \
|
||||||
TYPE v = m[H##ESIZE(e)]; \
|
TYPE v = m[H##ESIZE(e)]; \
|
||||||
@ -3168,8 +3161,7 @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
|
|||||||
if ((mask & emask) == 0) { \
|
if ((mask & emask) == 0) { \
|
||||||
continue; \
|
continue; \
|
||||||
} \
|
} \
|
||||||
fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
|
fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
|
||||||
&env->vfp.standard_fp_status; \
|
|
||||||
if (!(mask & (1 << (e * ESIZE)))) { \
|
if (!(mask & (1 << (e * ESIZE)))) { \
|
||||||
/* We need the result but without updating flags */ \
|
/* We need the result but without updating flags */ \
|
||||||
scratch_fpst = *fpst; \
|
scratch_fpst = *fpst; \
|
||||||
@ -3202,8 +3194,7 @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
|
|||||||
if ((mask & emask) == 0) { \
|
if ((mask & emask) == 0) { \
|
||||||
continue; \
|
continue; \
|
||||||
} \
|
} \
|
||||||
fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
|
fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
|
||||||
&env->vfp.standard_fp_status; \
|
|
||||||
if (!(mask & (1 << (e * ESIZE)))) { \
|
if (!(mask & (1 << (e * ESIZE)))) { \
|
||||||
/* We need the result but without updating flags */ \
|
/* We need the result but without updating flags */ \
|
||||||
scratch_fpst = *fpst; \
|
scratch_fpst = *fpst; \
|
||||||
@ -3267,8 +3258,7 @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
|
|||||||
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
|
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
|
||||||
continue; \
|
continue; \
|
||||||
} \
|
} \
|
||||||
fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
|
fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
|
||||||
&env->vfp.standard_fp_status; \
|
|
||||||
if (!(mask & 1)) { \
|
if (!(mask & 1)) { \
|
||||||
/* We need the result but without updating flags */ \
|
/* We need the result but without updating flags */ \
|
||||||
scratch_fpst = *fpst; \
|
scratch_fpst = *fpst; \
|
||||||
@ -3300,9 +3290,8 @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
|
|||||||
unsigned e; \
|
unsigned e; \
|
||||||
float_status *fpst; \
|
float_status *fpst; \
|
||||||
float_status scratch_fpst; \
|
float_status scratch_fpst; \
|
||||||
float_status *base_fpst = (ESIZE == 2) ? \
|
float_status *base_fpst = \
|
||||||
&env->vfp.standard_fp_status_f16 : \
|
&env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
|
||||||
&env->vfp.standard_fp_status; \
|
|
||||||
uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
|
uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
|
||||||
set_float_rounding_mode(rmode, base_fpst); \
|
set_float_rounding_mode(rmode, base_fpst); \
|
||||||
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
|
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
|
||||||
@ -3347,7 +3336,7 @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top)
|
|||||||
unsigned e;
|
unsigned e;
|
||||||
float_status *fpst;
|
float_status *fpst;
|
||||||
float_status scratch_fpst;
|
float_status scratch_fpst;
|
||||||
float_status *base_fpst = &env->vfp.standard_fp_status;
|
float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
|
||||||
bool old_fz = get_flush_to_zero(base_fpst);
|
bool old_fz = get_flush_to_zero(base_fpst);
|
||||||
set_flush_to_zero(false, base_fpst);
|
set_flush_to_zero(false, base_fpst);
|
||||||
for (e = 0; e < 16 / 4; e++, mask >>= 4) {
|
for (e = 0; e < 16 / 4; e++, mask >>= 4) {
|
||||||
@ -3377,7 +3366,7 @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top)
|
|||||||
unsigned e;
|
unsigned e;
|
||||||
float_status *fpst;
|
float_status *fpst;
|
||||||
float_status scratch_fpst;
|
float_status scratch_fpst;
|
||||||
float_status *base_fpst = &env->vfp.standard_fp_status;
|
float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
|
||||||
bool old_fiz = get_flush_inputs_to_zero(base_fpst);
|
bool old_fiz = get_flush_inputs_to_zero(base_fpst);
|
||||||
set_flush_inputs_to_zero(false, base_fpst);
|
set_flush_inputs_to_zero(false, base_fpst);
|
||||||
for (e = 0; e < 16 / 4; e++, mask >>= 4) {
|
for (e = 0; e < 16 / 4; e++, mask >>= 4) {
|
||||||
@ -3427,8 +3416,7 @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
|
|||||||
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
|
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
|
||||||
continue; \
|
continue; \
|
||||||
} \
|
} \
|
||||||
fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
|
fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
|
||||||
&env->vfp.standard_fp_status; \
|
|
||||||
if (!(mask & 1)) { \
|
if (!(mask & 1)) { \
|
||||||
/* We need the result but without updating flags */ \
|
/* We need the result but without updating flags */ \
|
||||||
scratch_fpst = *fpst; \
|
scratch_fpst = *fpst; \
|
||||||
|
@ -1043,8 +1043,8 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
|
|||||||
* produces default NaNs. We also need a second copy of fp_status with
|
* produces default NaNs. We also need a second copy of fp_status with
|
||||||
* round-to-odd -- see above.
|
* round-to-odd -- see above.
|
||||||
*/
|
*/
|
||||||
fpst_f16 = env->vfp.fp_status_f16_a64;
|
fpst_f16 = env->vfp.fp_status[FPST_A64_F16];
|
||||||
fpst_std = env->vfp.fp_status_a64;
|
fpst_std = env->vfp.fp_status[FPST_A64];
|
||||||
set_default_nan_mode(true, &fpst_std);
|
set_default_nan_mode(true, &fpst_std);
|
||||||
set_default_nan_mode(true, &fpst_f16);
|
set_default_nan_mode(true, &fpst_f16);
|
||||||
fpst_odd = fpst_std;
|
fpst_odd = fpst_std;
|
||||||
|
@ -879,12 +879,28 @@ DO_ZPZ(sve_fabs_h, uint16_t, H1_2, DO_FABS)
|
|||||||
DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS)
|
DO_ZPZ(sve_fabs_s, uint32_t, H1_4, DO_FABS)
|
||||||
DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS)
|
DO_ZPZ_D(sve_fabs_d, uint64_t, DO_FABS)
|
||||||
|
|
||||||
|
#define DO_AH_FABS_H(N) (float16_is_any_nan(N) ? (N) : DO_FABS(N))
|
||||||
|
#define DO_AH_FABS_S(N) (float32_is_any_nan(N) ? (N) : DO_FABS(N))
|
||||||
|
#define DO_AH_FABS_D(N) (float64_is_any_nan(N) ? (N) : DO_FABS(N))
|
||||||
|
|
||||||
|
DO_ZPZ(sve_ah_fabs_h, uint16_t, H1_2, DO_AH_FABS_H)
|
||||||
|
DO_ZPZ(sve_ah_fabs_s, uint32_t, H1_4, DO_AH_FABS_S)
|
||||||
|
DO_ZPZ_D(sve_ah_fabs_d, uint64_t, DO_AH_FABS_D)
|
||||||
|
|
||||||
#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1))
|
#define DO_FNEG(N) (N ^ ~((__typeof(N))-1 >> 1))
|
||||||
|
|
||||||
DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
|
DO_ZPZ(sve_fneg_h, uint16_t, H1_2, DO_FNEG)
|
||||||
DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG)
|
DO_ZPZ(sve_fneg_s, uint32_t, H1_4, DO_FNEG)
|
||||||
DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG)
|
DO_ZPZ_D(sve_fneg_d, uint64_t, DO_FNEG)
|
||||||
|
|
||||||
|
#define DO_AH_FNEG_H(N) (float16_is_any_nan(N) ? (N) : DO_FNEG(N))
|
||||||
|
#define DO_AH_FNEG_S(N) (float32_is_any_nan(N) ? (N) : DO_FNEG(N))
|
||||||
|
#define DO_AH_FNEG_D(N) (float64_is_any_nan(N) ? (N) : DO_FNEG(N))
|
||||||
|
|
||||||
|
DO_ZPZ(sve_ah_fneg_h, uint16_t, H1_2, DO_AH_FNEG_H)
|
||||||
|
DO_ZPZ(sve_ah_fneg_s, uint32_t, H1_4, DO_AH_FNEG_S)
|
||||||
|
DO_ZPZ_D(sve_ah_fneg_d, uint64_t, DO_AH_FNEG_D)
|
||||||
|
|
||||||
#define DO_NOT(N) (~N)
|
#define DO_NOT(N) (~N)
|
||||||
|
|
||||||
DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT)
|
DO_ZPZ(sve_not_zpz_b, uint8_t, H1, DO_NOT)
|
||||||
@ -2539,6 +2555,7 @@ void HELPER(sve_fexpa_d)(void *vd, void *vn, uint32_t desc)
|
|||||||
void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
|
void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t i, opr_sz = simd_oprsz(desc) / 2;
|
intptr_t i, opr_sz = simd_oprsz(desc) / 2;
|
||||||
|
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
uint16_t *d = vd, *n = vn, *m = vm;
|
uint16_t *d = vd, *n = vn, *m = vm;
|
||||||
for (i = 0; i < opr_sz; i += 1) {
|
for (i = 0; i < opr_sz; i += 1) {
|
||||||
uint16_t nn = n[i];
|
uint16_t nn = n[i];
|
||||||
@ -2546,13 +2563,17 @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
|
|||||||
if (mm & 1) {
|
if (mm & 1) {
|
||||||
nn = float16_one;
|
nn = float16_one;
|
||||||
}
|
}
|
||||||
d[i] = nn ^ (mm & 2) << 14;
|
if (mm & 2) {
|
||||||
|
nn = float16_maybe_ah_chs(nn, fpcr_ah);
|
||||||
|
}
|
||||||
|
d[i] = nn;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
|
void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t i, opr_sz = simd_oprsz(desc) / 4;
|
intptr_t i, opr_sz = simd_oprsz(desc) / 4;
|
||||||
|
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
uint32_t *d = vd, *n = vn, *m = vm;
|
uint32_t *d = vd, *n = vn, *m = vm;
|
||||||
for (i = 0; i < opr_sz; i += 1) {
|
for (i = 0; i < opr_sz; i += 1) {
|
||||||
uint32_t nn = n[i];
|
uint32_t nn = n[i];
|
||||||
@ -2560,13 +2581,17 @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
|
|||||||
if (mm & 1) {
|
if (mm & 1) {
|
||||||
nn = float32_one;
|
nn = float32_one;
|
||||||
}
|
}
|
||||||
d[i] = nn ^ (mm & 2) << 30;
|
if (mm & 2) {
|
||||||
|
nn = float32_maybe_ah_chs(nn, fpcr_ah);
|
||||||
|
}
|
||||||
|
d[i] = nn;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
|
void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
|
intptr_t i, opr_sz = simd_oprsz(desc) / 8;
|
||||||
|
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
uint64_t *d = vd, *n = vn, *m = vm;
|
uint64_t *d = vd, *n = vn, *m = vm;
|
||||||
for (i = 0; i < opr_sz; i += 1) {
|
for (i = 0; i < opr_sz; i += 1) {
|
||||||
uint64_t nn = n[i];
|
uint64_t nn = n[i];
|
||||||
@ -2574,7 +2599,10 @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
|
|||||||
if (mm & 1) {
|
if (mm & 1) {
|
||||||
nn = float64_one;
|
nn = float64_one;
|
||||||
}
|
}
|
||||||
d[i] = nn ^ (mm & 2) << 62;
|
if (mm & 2) {
|
||||||
|
nn = float64_maybe_ah_chs(nn, fpcr_ah);
|
||||||
|
}
|
||||||
|
d[i] = nn;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4190,7 +4218,7 @@ static TYPE NAME##_reduce(TYPE *data, float_status *status, uintptr_t n) \
|
|||||||
uintptr_t half = n / 2; \
|
uintptr_t half = n / 2; \
|
||||||
TYPE lo = NAME##_reduce(data, status, half); \
|
TYPE lo = NAME##_reduce(data, status, half); \
|
||||||
TYPE hi = NAME##_reduce(data + half, status, half); \
|
TYPE hi = NAME##_reduce(data + half, status, half); \
|
||||||
return TYPE##_##FUNC(lo, hi, status); \
|
return FUNC(lo, hi, status); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \
|
uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \
|
||||||
@ -4211,26 +4239,37 @@ uint64_t HELPER(NAME)(void *vn, void *vg, float_status *s, uint32_t desc) \
|
|||||||
return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \
|
return NAME##_reduce(data, s, maxsz / sizeof(TYPE)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
DO_REDUCE(sve_faddv_h, float16, H1_2, add, float16_zero)
|
DO_REDUCE(sve_faddv_h, float16, H1_2, float16_add, float16_zero)
|
||||||
DO_REDUCE(sve_faddv_s, float32, H1_4, add, float32_zero)
|
DO_REDUCE(sve_faddv_s, float32, H1_4, float32_add, float32_zero)
|
||||||
DO_REDUCE(sve_faddv_d, float64, H1_8, add, float64_zero)
|
DO_REDUCE(sve_faddv_d, float64, H1_8, float64_add, float64_zero)
|
||||||
|
|
||||||
/* Identity is floatN_default_nan, without the function call. */
|
/* Identity is floatN_default_nan, without the function call. */
|
||||||
DO_REDUCE(sve_fminnmv_h, float16, H1_2, minnum, 0x7E00)
|
DO_REDUCE(sve_fminnmv_h, float16, H1_2, float16_minnum, 0x7E00)
|
||||||
DO_REDUCE(sve_fminnmv_s, float32, H1_4, minnum, 0x7FC00000)
|
DO_REDUCE(sve_fminnmv_s, float32, H1_4, float32_minnum, 0x7FC00000)
|
||||||
DO_REDUCE(sve_fminnmv_d, float64, H1_8, minnum, 0x7FF8000000000000ULL)
|
DO_REDUCE(sve_fminnmv_d, float64, H1_8, float64_minnum, 0x7FF8000000000000ULL)
|
||||||
|
|
||||||
DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, maxnum, 0x7E00)
|
DO_REDUCE(sve_fmaxnmv_h, float16, H1_2, float16_maxnum, 0x7E00)
|
||||||
DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, maxnum, 0x7FC00000)
|
DO_REDUCE(sve_fmaxnmv_s, float32, H1_4, float32_maxnum, 0x7FC00000)
|
||||||
DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, maxnum, 0x7FF8000000000000ULL)
|
DO_REDUCE(sve_fmaxnmv_d, float64, H1_8, float64_maxnum, 0x7FF8000000000000ULL)
|
||||||
|
|
||||||
DO_REDUCE(sve_fminv_h, float16, H1_2, min, float16_infinity)
|
DO_REDUCE(sve_fminv_h, float16, H1_2, float16_min, float16_infinity)
|
||||||
DO_REDUCE(sve_fminv_s, float32, H1_4, min, float32_infinity)
|
DO_REDUCE(sve_fminv_s, float32, H1_4, float32_min, float32_infinity)
|
||||||
DO_REDUCE(sve_fminv_d, float64, H1_8, min, float64_infinity)
|
DO_REDUCE(sve_fminv_d, float64, H1_8, float64_min, float64_infinity)
|
||||||
|
|
||||||
DO_REDUCE(sve_fmaxv_h, float16, H1_2, max, float16_chs(float16_infinity))
|
DO_REDUCE(sve_fmaxv_h, float16, H1_2, float16_max, float16_chs(float16_infinity))
|
||||||
DO_REDUCE(sve_fmaxv_s, float32, H1_4, max, float32_chs(float32_infinity))
|
DO_REDUCE(sve_fmaxv_s, float32, H1_4, float32_max, float32_chs(float32_infinity))
|
||||||
DO_REDUCE(sve_fmaxv_d, float64, H1_8, max, float64_chs(float64_infinity))
|
DO_REDUCE(sve_fmaxv_d, float64, H1_8, float64_max, float64_chs(float64_infinity))
|
||||||
|
|
||||||
|
DO_REDUCE(sve_ah_fminv_h, float16, H1_2, helper_vfp_ah_minh, float16_infinity)
|
||||||
|
DO_REDUCE(sve_ah_fminv_s, float32, H1_4, helper_vfp_ah_mins, float32_infinity)
|
||||||
|
DO_REDUCE(sve_ah_fminv_d, float64, H1_8, helper_vfp_ah_mind, float64_infinity)
|
||||||
|
|
||||||
|
DO_REDUCE(sve_ah_fmaxv_h, float16, H1_2, helper_vfp_ah_maxh,
|
||||||
|
float16_chs(float16_infinity))
|
||||||
|
DO_REDUCE(sve_ah_fmaxv_s, float32, H1_4, helper_vfp_ah_maxs,
|
||||||
|
float32_chs(float32_infinity))
|
||||||
|
DO_REDUCE(sve_ah_fmaxv_d, float64, H1_8, helper_vfp_ah_maxd,
|
||||||
|
float64_chs(float64_infinity))
|
||||||
|
|
||||||
#undef DO_REDUCE
|
#undef DO_REDUCE
|
||||||
|
|
||||||
@ -4336,6 +4375,14 @@ DO_ZPZZ_FP(sve_fmax_h, uint16_t, H1_2, float16_max)
|
|||||||
DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
|
DO_ZPZZ_FP(sve_fmax_s, uint32_t, H1_4, float32_max)
|
||||||
DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max)
|
DO_ZPZZ_FP(sve_fmax_d, uint64_t, H1_8, float64_max)
|
||||||
|
|
||||||
|
DO_ZPZZ_FP(sve_ah_fmin_h, uint16_t, H1_2, helper_vfp_ah_minh)
|
||||||
|
DO_ZPZZ_FP(sve_ah_fmin_s, uint32_t, H1_4, helper_vfp_ah_mins)
|
||||||
|
DO_ZPZZ_FP(sve_ah_fmin_d, uint64_t, H1_8, helper_vfp_ah_mind)
|
||||||
|
|
||||||
|
DO_ZPZZ_FP(sve_ah_fmax_h, uint16_t, H1_2, helper_vfp_ah_maxh)
|
||||||
|
DO_ZPZZ_FP(sve_ah_fmax_s, uint32_t, H1_4, helper_vfp_ah_maxs)
|
||||||
|
DO_ZPZZ_FP(sve_ah_fmax_d, uint64_t, H1_8, helper_vfp_ah_maxd)
|
||||||
|
|
||||||
DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
|
DO_ZPZZ_FP(sve_fminnum_h, uint16_t, H1_2, float16_minnum)
|
||||||
DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
|
DO_ZPZZ_FP(sve_fminnum_s, uint32_t, H1_4, float32_minnum)
|
||||||
DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum)
|
DO_ZPZZ_FP(sve_fminnum_d, uint64_t, H1_8, float64_minnum)
|
||||||
@ -4359,9 +4406,31 @@ static inline float64 abd_d(float64 a, float64 b, float_status *s)
|
|||||||
return float64_abs(float64_sub(a, b, s));
|
return float64_abs(float64_sub(a, b, s));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */
|
||||||
|
static float16 ah_abd_h(float16 op1, float16 op2, float_status *stat)
|
||||||
|
{
|
||||||
|
float16 r = float16_sub(op1, op2, stat);
|
||||||
|
return float16_is_any_nan(r) ? r : float16_abs(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float32 ah_abd_s(float32 op1, float32 op2, float_status *stat)
|
||||||
|
{
|
||||||
|
float32 r = float32_sub(op1, op2, stat);
|
||||||
|
return float32_is_any_nan(r) ? r : float32_abs(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float64 ah_abd_d(float64 op1, float64 op2, float_status *stat)
|
||||||
|
{
|
||||||
|
float64 r = float64_sub(op1, op2, stat);
|
||||||
|
return float64_is_any_nan(r) ? r : float64_abs(r);
|
||||||
|
}
|
||||||
|
|
||||||
DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h)
|
DO_ZPZZ_FP(sve_fabd_h, uint16_t, H1_2, abd_h)
|
||||||
DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s)
|
DO_ZPZZ_FP(sve_fabd_s, uint32_t, H1_4, abd_s)
|
||||||
DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d)
|
DO_ZPZZ_FP(sve_fabd_d, uint64_t, H1_8, abd_d)
|
||||||
|
DO_ZPZZ_FP(sve_ah_fabd_h, uint16_t, H1_2, ah_abd_h)
|
||||||
|
DO_ZPZZ_FP(sve_ah_fabd_s, uint32_t, H1_4, ah_abd_s)
|
||||||
|
DO_ZPZZ_FP(sve_ah_fabd_d, uint64_t, H1_8, ah_abd_d)
|
||||||
|
|
||||||
static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
|
static inline float64 scalbn_d(float64 a, int64_t b, float_status *s)
|
||||||
{
|
{
|
||||||
@ -4448,6 +4517,14 @@ DO_ZPZS_FP(sve_fmins_h, float16, H1_2, float16_min)
|
|||||||
DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min)
|
DO_ZPZS_FP(sve_fmins_s, float32, H1_4, float32_min)
|
||||||
DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min)
|
DO_ZPZS_FP(sve_fmins_d, float64, H1_8, float64_min)
|
||||||
|
|
||||||
|
DO_ZPZS_FP(sve_ah_fmaxs_h, float16, H1_2, helper_vfp_ah_maxh)
|
||||||
|
DO_ZPZS_FP(sve_ah_fmaxs_s, float32, H1_4, helper_vfp_ah_maxs)
|
||||||
|
DO_ZPZS_FP(sve_ah_fmaxs_d, float64, H1_8, helper_vfp_ah_maxd)
|
||||||
|
|
||||||
|
DO_ZPZS_FP(sve_ah_fmins_h, float16, H1_2, helper_vfp_ah_minh)
|
||||||
|
DO_ZPZS_FP(sve_ah_fmins_s, float32, H1_4, helper_vfp_ah_mins)
|
||||||
|
DO_ZPZS_FP(sve_ah_fmins_d, float64, H1_8, helper_vfp_ah_mind)
|
||||||
|
|
||||||
/* Fully general two-operand expander, controlled by a predicate,
|
/* Fully general two-operand expander, controlled by a predicate,
|
||||||
* With the extra float_status parameter.
|
* With the extra float_status parameter.
|
||||||
*/
|
*/
|
||||||
@ -4737,7 +4814,7 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
|
|||||||
|
|
||||||
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
|
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
|
||||||
float_status *status, uint32_t desc,
|
float_status *status, uint32_t desc,
|
||||||
uint16_t neg1, uint16_t neg3)
|
uint16_t neg1, uint16_t neg3, int flags)
|
||||||
{
|
{
|
||||||
intptr_t i = simd_oprsz(desc);
|
intptr_t i = simd_oprsz(desc);
|
||||||
uint64_t *g = vg;
|
uint64_t *g = vg;
|
||||||
@ -4752,7 +4829,7 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
|
|||||||
e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
|
e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
|
||||||
e2 = *(uint16_t *)(vm + H1_2(i));
|
e2 = *(uint16_t *)(vm + H1_2(i));
|
||||||
e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
|
e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
|
||||||
r = float16_muladd(e1, e2, e3, 0, status);
|
r = float16_muladd(e1, e2, e3, flags, status);
|
||||||
*(uint16_t *)(vd + H1_2(i)) = r;
|
*(uint16_t *)(vd + H1_2(i)) = r;
|
||||||
}
|
}
|
||||||
} while (i & 63);
|
} while (i & 63);
|
||||||
@ -4762,30 +4839,51 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
|
|||||||
void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0);
|
do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0);
|
do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000);
|
do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000);
|
do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
void *vg, float_status *status, uint32_t desc)
|
||||||
|
{
|
||||||
|
do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
|
||||||
|
float_muladd_negate_product);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
void *vg, float_status *status, uint32_t desc)
|
||||||
|
{
|
||||||
|
do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
|
||||||
|
float_muladd_negate_product | float_muladd_negate_c);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
void *vg, float_status *status, uint32_t desc)
|
||||||
|
{
|
||||||
|
do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
|
||||||
|
float_muladd_negate_c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
|
static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
|
||||||
float_status *status, uint32_t desc,
|
float_status *status, uint32_t desc,
|
||||||
uint32_t neg1, uint32_t neg3)
|
uint32_t neg1, uint32_t neg3, int flags)
|
||||||
{
|
{
|
||||||
intptr_t i = simd_oprsz(desc);
|
intptr_t i = simd_oprsz(desc);
|
||||||
uint64_t *g = vg;
|
uint64_t *g = vg;
|
||||||
@ -4800,7 +4898,7 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
|
|||||||
e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
|
e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
|
||||||
e2 = *(uint32_t *)(vm + H1_4(i));
|
e2 = *(uint32_t *)(vm + H1_4(i));
|
||||||
e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
|
e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
|
||||||
r = float32_muladd(e1, e2, e3, 0, status);
|
r = float32_muladd(e1, e2, e3, flags, status);
|
||||||
*(uint32_t *)(vd + H1_4(i)) = r;
|
*(uint32_t *)(vd + H1_4(i)) = r;
|
||||||
}
|
}
|
||||||
} while (i & 63);
|
} while (i & 63);
|
||||||
@ -4810,30 +4908,51 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
|
|||||||
void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0);
|
do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0);
|
do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000);
|
do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000);
|
do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
void *vg, float_status *status, uint32_t desc)
|
||||||
|
{
|
||||||
|
do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
|
||||||
|
float_muladd_negate_product);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
void *vg, float_status *status, uint32_t desc)
|
||||||
|
{
|
||||||
|
do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
|
||||||
|
float_muladd_negate_product | float_muladd_negate_c);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
void *vg, float_status *status, uint32_t desc)
|
||||||
|
{
|
||||||
|
do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
|
||||||
|
float_muladd_negate_c);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
|
static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
|
||||||
float_status *status, uint32_t desc,
|
float_status *status, uint32_t desc,
|
||||||
uint64_t neg1, uint64_t neg3)
|
uint64_t neg1, uint64_t neg3, int flags)
|
||||||
{
|
{
|
||||||
intptr_t i = simd_oprsz(desc);
|
intptr_t i = simd_oprsz(desc);
|
||||||
uint64_t *g = vg;
|
uint64_t *g = vg;
|
||||||
@ -4848,7 +4967,7 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
|
|||||||
e1 = *(uint64_t *)(vn + i) ^ neg1;
|
e1 = *(uint64_t *)(vn + i) ^ neg1;
|
||||||
e2 = *(uint64_t *)(vm + i);
|
e2 = *(uint64_t *)(vm + i);
|
||||||
e3 = *(uint64_t *)(va + i) ^ neg3;
|
e3 = *(uint64_t *)(va + i) ^ neg3;
|
||||||
r = float64_muladd(e1, e2, e3, 0, status);
|
r = float64_muladd(e1, e2, e3, flags, status);
|
||||||
*(uint64_t *)(vd + i) = r;
|
*(uint64_t *)(vd + i) = r;
|
||||||
}
|
}
|
||||||
} while (i & 63);
|
} while (i & 63);
|
||||||
@ -4858,25 +4977,46 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
|
|||||||
void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0);
|
do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0);
|
do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN);
|
do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN);
|
do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
void *vg, float_status *status, uint32_t desc)
|
||||||
|
{
|
||||||
|
do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
|
||||||
|
float_muladd_negate_product);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
void *vg, float_status *status, uint32_t desc)
|
||||||
|
{
|
||||||
|
do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
|
||||||
|
float_muladd_negate_product | float_muladd_negate_c);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
||||||
|
void *vg, float_status *status, uint32_t desc)
|
||||||
|
{
|
||||||
|
do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
|
||||||
|
float_muladd_negate_c);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Two operand floating-point comparison controlled by a predicate.
|
/* Two operand floating-point comparison controlled by a predicate.
|
||||||
@ -4994,16 +5134,24 @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm,
|
|||||||
0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
0x3c00, 0xb800, 0x293a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
};
|
};
|
||||||
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16);
|
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float16);
|
||||||
intptr_t x = simd_data(desc);
|
intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
|
||||||
|
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
|
||||||
float16 *d = vd, *n = vn, *m = vm;
|
float16 *d = vd, *n = vn, *m = vm;
|
||||||
|
|
||||||
for (i = 0; i < opr_sz; i++) {
|
for (i = 0; i < opr_sz; i++) {
|
||||||
float16 mm = m[i];
|
float16 mm = m[i];
|
||||||
intptr_t xx = x;
|
intptr_t xx = x;
|
||||||
|
int flags = 0;
|
||||||
|
|
||||||
if (float16_is_neg(mm)) {
|
if (float16_is_neg(mm)) {
|
||||||
mm = float16_abs(mm);
|
if (fpcr_ah) {
|
||||||
|
flags = float_muladd_negate_product;
|
||||||
|
} else {
|
||||||
|
mm = float16_abs(mm);
|
||||||
|
}
|
||||||
xx += 8;
|
xx += 8;
|
||||||
}
|
}
|
||||||
d[i] = float16_muladd(n[i], mm, coeff[xx], 0, s);
|
d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5017,16 +5165,24 @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm,
|
|||||||
0x37cd37cc, 0x00000000, 0x00000000, 0x00000000,
|
0x37cd37cc, 0x00000000, 0x00000000, 0x00000000,
|
||||||
};
|
};
|
||||||
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32);
|
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float32);
|
||||||
intptr_t x = simd_data(desc);
|
intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
|
||||||
|
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
|
||||||
float32 *d = vd, *n = vn, *m = vm;
|
float32 *d = vd, *n = vn, *m = vm;
|
||||||
|
|
||||||
for (i = 0; i < opr_sz; i++) {
|
for (i = 0; i < opr_sz; i++) {
|
||||||
float32 mm = m[i];
|
float32 mm = m[i];
|
||||||
intptr_t xx = x;
|
intptr_t xx = x;
|
||||||
|
int flags = 0;
|
||||||
|
|
||||||
if (float32_is_neg(mm)) {
|
if (float32_is_neg(mm)) {
|
||||||
mm = float32_abs(mm);
|
if (fpcr_ah) {
|
||||||
|
flags = float_muladd_negate_product;
|
||||||
|
} else {
|
||||||
|
mm = float32_abs(mm);
|
||||||
|
}
|
||||||
xx += 8;
|
xx += 8;
|
||||||
}
|
}
|
||||||
d[i] = float32_muladd(n[i], mm, coeff[xx], 0, s);
|
d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5044,16 +5200,24 @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm,
|
|||||||
0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull,
|
0x3e21ee96d2641b13ull, 0xbda8f76380fbb401ull,
|
||||||
};
|
};
|
||||||
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64);
|
intptr_t i, opr_sz = simd_oprsz(desc) / sizeof(float64);
|
||||||
intptr_t x = simd_data(desc);
|
intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
|
||||||
|
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
|
||||||
float64 *d = vd, *n = vn, *m = vm;
|
float64 *d = vd, *n = vn, *m = vm;
|
||||||
|
|
||||||
for (i = 0; i < opr_sz; i++) {
|
for (i = 0; i < opr_sz; i++) {
|
||||||
float64 mm = m[i];
|
float64 mm = m[i];
|
||||||
intptr_t xx = x;
|
intptr_t xx = x;
|
||||||
|
int flags = 0;
|
||||||
|
|
||||||
if (float64_is_neg(mm)) {
|
if (float64_is_neg(mm)) {
|
||||||
mm = float64_abs(mm);
|
if (fpcr_ah) {
|
||||||
|
flags = float_muladd_negate_product;
|
||||||
|
} else {
|
||||||
|
mm = float64_abs(mm);
|
||||||
|
}
|
||||||
xx += 8;
|
xx += 8;
|
||||||
}
|
}
|
||||||
d[i] = float64_muladd(n[i], mm, coeff[xx], 0, s);
|
d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5066,8 +5230,8 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
|
|||||||
{
|
{
|
||||||
intptr_t j, i = simd_oprsz(desc);
|
intptr_t j, i = simd_oprsz(desc);
|
||||||
uint64_t *g = vg;
|
uint64_t *g = vg;
|
||||||
float16 neg_imag = float16_set_sign(0, simd_data(desc));
|
bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
float16 neg_real = float16_chs(neg_imag);
|
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
uint64_t pg = g[(i - 1) >> 6];
|
uint64_t pg = g[(i - 1) >> 6];
|
||||||
@ -5079,9 +5243,15 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
|
|||||||
i -= 2 * sizeof(float16);
|
i -= 2 * sizeof(float16);
|
||||||
|
|
||||||
e0 = *(float16 *)(vn + H1_2(i));
|
e0 = *(float16 *)(vn + H1_2(i));
|
||||||
e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real;
|
e1 = *(float16 *)(vm + H1_2(j));
|
||||||
e2 = *(float16 *)(vn + H1_2(j));
|
e2 = *(float16 *)(vn + H1_2(j));
|
||||||
e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag;
|
e3 = *(float16 *)(vm + H1_2(i));
|
||||||
|
|
||||||
|
if (rot) {
|
||||||
|
e3 = float16_maybe_ah_chs(e3, fpcr_ah);
|
||||||
|
} else {
|
||||||
|
e1 = float16_maybe_ah_chs(e1, fpcr_ah);
|
||||||
|
}
|
||||||
|
|
||||||
if (likely((pg >> (i & 63)) & 1)) {
|
if (likely((pg >> (i & 63)) & 1)) {
|
||||||
*(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s);
|
*(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s);
|
||||||
@ -5098,8 +5268,8 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
|
|||||||
{
|
{
|
||||||
intptr_t j, i = simd_oprsz(desc);
|
intptr_t j, i = simd_oprsz(desc);
|
||||||
uint64_t *g = vg;
|
uint64_t *g = vg;
|
||||||
float32 neg_imag = float32_set_sign(0, simd_data(desc));
|
bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
float32 neg_real = float32_chs(neg_imag);
|
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
uint64_t pg = g[(i - 1) >> 6];
|
uint64_t pg = g[(i - 1) >> 6];
|
||||||
@ -5111,9 +5281,15 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
|
|||||||
i -= 2 * sizeof(float32);
|
i -= 2 * sizeof(float32);
|
||||||
|
|
||||||
e0 = *(float32 *)(vn + H1_2(i));
|
e0 = *(float32 *)(vn + H1_2(i));
|
||||||
e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real;
|
e1 = *(float32 *)(vm + H1_2(j));
|
||||||
e2 = *(float32 *)(vn + H1_2(j));
|
e2 = *(float32 *)(vn + H1_2(j));
|
||||||
e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag;
|
e3 = *(float32 *)(vm + H1_2(i));
|
||||||
|
|
||||||
|
if (rot) {
|
||||||
|
e3 = float32_maybe_ah_chs(e3, fpcr_ah);
|
||||||
|
} else {
|
||||||
|
e1 = float32_maybe_ah_chs(e1, fpcr_ah);
|
||||||
|
}
|
||||||
|
|
||||||
if (likely((pg >> (i & 63)) & 1)) {
|
if (likely((pg >> (i & 63)) & 1)) {
|
||||||
*(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s);
|
*(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s);
|
||||||
@ -5130,8 +5306,8 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
|
|||||||
{
|
{
|
||||||
intptr_t j, i = simd_oprsz(desc);
|
intptr_t j, i = simd_oprsz(desc);
|
||||||
uint64_t *g = vg;
|
uint64_t *g = vg;
|
||||||
float64 neg_imag = float64_set_sign(0, simd_data(desc));
|
bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
float64 neg_real = float64_chs(neg_imag);
|
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
uint64_t pg = g[(i - 1) >> 6];
|
uint64_t pg = g[(i - 1) >> 6];
|
||||||
@ -5143,9 +5319,15 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
|
|||||||
i -= 2 * sizeof(float64);
|
i -= 2 * sizeof(float64);
|
||||||
|
|
||||||
e0 = *(float64 *)(vn + H1_2(i));
|
e0 = *(float64 *)(vn + H1_2(i));
|
||||||
e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real;
|
e1 = *(float64 *)(vm + H1_2(j));
|
||||||
e2 = *(float64 *)(vn + H1_2(j));
|
e2 = *(float64 *)(vn + H1_2(j));
|
||||||
e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag;
|
e3 = *(float64 *)(vm + H1_2(i));
|
||||||
|
|
||||||
|
if (rot) {
|
||||||
|
e3 = float64_maybe_ah_chs(e3, fpcr_ah);
|
||||||
|
} else {
|
||||||
|
e1 = float64_maybe_ah_chs(e1, fpcr_ah);
|
||||||
|
}
|
||||||
|
|
||||||
if (likely((pg >> (i & 63)) & 1)) {
|
if (likely((pg >> (i & 63)) & 1)) {
|
||||||
*(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s);
|
*(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s);
|
||||||
@ -5165,13 +5347,18 @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
|||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t j, i = simd_oprsz(desc);
|
intptr_t j, i = simd_oprsz(desc);
|
||||||
unsigned rot = simd_data(desc);
|
bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
bool flip = rot & 1;
|
uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
|
||||||
float16 neg_imag, neg_real;
|
uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
|
uint32_t negf_real = flip ^ negf_imag;
|
||||||
|
float16 negx_imag, negx_real;
|
||||||
uint64_t *g = vg;
|
uint64_t *g = vg;
|
||||||
|
|
||||||
neg_imag = float16_set_sign(0, (rot & 2) != 0);
|
/* With AH=0, use negx; with AH=1 use negf. */
|
||||||
neg_real = float16_set_sign(0, rot == 1 || rot == 2);
|
negx_real = (negf_real & ~fpcr_ah) << 15;
|
||||||
|
negx_imag = (negf_imag & ~fpcr_ah) << 15;
|
||||||
|
negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
uint64_t pg = g[(i - 1) >> 6];
|
uint64_t pg = g[(i - 1) >> 6];
|
||||||
@ -5188,18 +5375,18 @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
|
|||||||
mi = *(float16 *)(vm + H1_2(j));
|
mi = *(float16 *)(vm + H1_2(j));
|
||||||
|
|
||||||
e2 = (flip ? ni : nr);
|
e2 = (flip ? ni : nr);
|
||||||
e1 = (flip ? mi : mr) ^ neg_real;
|
e1 = (flip ? mi : mr) ^ negx_real;
|
||||||
e4 = e2;
|
e4 = e2;
|
||||||
e3 = (flip ? mr : mi) ^ neg_imag;
|
e3 = (flip ? mr : mi) ^ negx_imag;
|
||||||
|
|
||||||
if (likely((pg >> (i & 63)) & 1)) {
|
if (likely((pg >> (i & 63)) & 1)) {
|
||||||
d = *(float16 *)(va + H1_2(i));
|
d = *(float16 *)(va + H1_2(i));
|
||||||
d = float16_muladd(e2, e1, d, 0, status);
|
d = float16_muladd(e2, e1, d, negf_real, status);
|
||||||
*(float16 *)(vd + H1_2(i)) = d;
|
*(float16 *)(vd + H1_2(i)) = d;
|
||||||
}
|
}
|
||||||
if (likely((pg >> (j & 63)) & 1)) {
|
if (likely((pg >> (j & 63)) & 1)) {
|
||||||
d = *(float16 *)(va + H1_2(j));
|
d = *(float16 *)(va + H1_2(j));
|
||||||
d = float16_muladd(e4, e3, d, 0, status);
|
d = float16_muladd(e4, e3, d, negf_imag, status);
|
||||||
*(float16 *)(vd + H1_2(j)) = d;
|
*(float16 *)(vd + H1_2(j)) = d;
|
||||||
}
|
}
|
||||||
} while (i & 63);
|
} while (i & 63);
|
||||||
@ -5210,13 +5397,18 @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
|||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t j, i = simd_oprsz(desc);
|
intptr_t j, i = simd_oprsz(desc);
|
||||||
unsigned rot = simd_data(desc);
|
bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
bool flip = rot & 1;
|
uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
|
||||||
float32 neg_imag, neg_real;
|
uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
|
uint32_t negf_real = flip ^ negf_imag;
|
||||||
|
float32 negx_imag, negx_real;
|
||||||
uint64_t *g = vg;
|
uint64_t *g = vg;
|
||||||
|
|
||||||
neg_imag = float32_set_sign(0, (rot & 2) != 0);
|
/* With AH=0, use negx; with AH=1 use negf. */
|
||||||
neg_real = float32_set_sign(0, rot == 1 || rot == 2);
|
negx_real = (negf_real & ~fpcr_ah) << 31;
|
||||||
|
negx_imag = (negf_imag & ~fpcr_ah) << 31;
|
||||||
|
negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
uint64_t pg = g[(i - 1) >> 6];
|
uint64_t pg = g[(i - 1) >> 6];
|
||||||
@ -5233,18 +5425,18 @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
|
|||||||
mi = *(float32 *)(vm + H1_2(j));
|
mi = *(float32 *)(vm + H1_2(j));
|
||||||
|
|
||||||
e2 = (flip ? ni : nr);
|
e2 = (flip ? ni : nr);
|
||||||
e1 = (flip ? mi : mr) ^ neg_real;
|
e1 = (flip ? mi : mr) ^ negx_real;
|
||||||
e4 = e2;
|
e4 = e2;
|
||||||
e3 = (flip ? mr : mi) ^ neg_imag;
|
e3 = (flip ? mr : mi) ^ negx_imag;
|
||||||
|
|
||||||
if (likely((pg >> (i & 63)) & 1)) {
|
if (likely((pg >> (i & 63)) & 1)) {
|
||||||
d = *(float32 *)(va + H1_2(i));
|
d = *(float32 *)(va + H1_2(i));
|
||||||
d = float32_muladd(e2, e1, d, 0, status);
|
d = float32_muladd(e2, e1, d, negf_real, status);
|
||||||
*(float32 *)(vd + H1_2(i)) = d;
|
*(float32 *)(vd + H1_2(i)) = d;
|
||||||
}
|
}
|
||||||
if (likely((pg >> (j & 63)) & 1)) {
|
if (likely((pg >> (j & 63)) & 1)) {
|
||||||
d = *(float32 *)(va + H1_2(j));
|
d = *(float32 *)(va + H1_2(j));
|
||||||
d = float32_muladd(e4, e3, d, 0, status);
|
d = float32_muladd(e4, e3, d, negf_imag, status);
|
||||||
*(float32 *)(vd + H1_2(j)) = d;
|
*(float32 *)(vd + H1_2(j)) = d;
|
||||||
}
|
}
|
||||||
} while (i & 63);
|
} while (i & 63);
|
||||||
@ -5255,13 +5447,18 @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
|||||||
void *vg, float_status *status, uint32_t desc)
|
void *vg, float_status *status, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t j, i = simd_oprsz(desc);
|
intptr_t j, i = simd_oprsz(desc);
|
||||||
unsigned rot = simd_data(desc);
|
bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
bool flip = rot & 1;
|
uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
|
||||||
float64 neg_imag, neg_real;
|
uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
|
uint32_t negf_real = flip ^ negf_imag;
|
||||||
|
float64 negx_imag, negx_real;
|
||||||
uint64_t *g = vg;
|
uint64_t *g = vg;
|
||||||
|
|
||||||
neg_imag = float64_set_sign(0, (rot & 2) != 0);
|
/* With AH=0, use negx; with AH=1 use negf. */
|
||||||
neg_real = float64_set_sign(0, rot == 1 || rot == 2);
|
negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63;
|
||||||
|
negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63;
|
||||||
|
negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
|
||||||
do {
|
do {
|
||||||
uint64_t pg = g[(i - 1) >> 6];
|
uint64_t pg = g[(i - 1) >> 6];
|
||||||
@ -5278,18 +5475,18 @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
|
|||||||
mi = *(float64 *)(vm + H1_2(j));
|
mi = *(float64 *)(vm + H1_2(j));
|
||||||
|
|
||||||
e2 = (flip ? ni : nr);
|
e2 = (flip ? ni : nr);
|
||||||
e1 = (flip ? mi : mr) ^ neg_real;
|
e1 = (flip ? mi : mr) ^ negx_real;
|
||||||
e4 = e2;
|
e4 = e2;
|
||||||
e3 = (flip ? mr : mi) ^ neg_imag;
|
e3 = (flip ? mr : mi) ^ negx_imag;
|
||||||
|
|
||||||
if (likely((pg >> (i & 63)) & 1)) {
|
if (likely((pg >> (i & 63)) & 1)) {
|
||||||
d = *(float64 *)(va + H1_2(i));
|
d = *(float64 *)(va + H1_2(i));
|
||||||
d = float64_muladd(e2, e1, d, 0, status);
|
d = float64_muladd(e2, e1, d, negf_real, status);
|
||||||
*(float64 *)(vd + H1_2(i)) = d;
|
*(float64 *)(vd + H1_2(i)) = d;
|
||||||
}
|
}
|
||||||
if (likely((pg >> (j & 63)) & 1)) {
|
if (likely((pg >> (j & 63)) & 1)) {
|
||||||
d = *(float64 *)(va + H1_2(j));
|
d = *(float64 *)(va + H1_2(j));
|
||||||
d = float64_muladd(e4, e3, d, 0, status);
|
d = float64_muladd(e4, e3, d, negf_imag, status);
|
||||||
*(float64 *)(vd + H1_2(j)) = d;
|
*(float64 *)(vd + H1_2(j)) = d;
|
||||||
}
|
}
|
||||||
} while (i & 63);
|
} while (i & 63);
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -185,6 +185,19 @@ static inline TCGv_ptr pred_full_reg_ptr(DisasContext *s, int regno)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return the ARMFPStatusFlavour to use based on element size and
|
||||||
|
* whether FPCR.AH is set.
|
||||||
|
*/
|
||||||
|
static inline ARMFPStatusFlavour select_ah_fpst(DisasContext *s, MemOp esz)
|
||||||
|
{
|
||||||
|
if (s->fpcr_ah) {
|
||||||
|
return esz == MO_16 ? FPST_AH_F16 : FPST_AH;
|
||||||
|
} else {
|
||||||
|
return esz == MO_16 ? FPST_A64_F16 : FPST_A64;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool disas_sve(DisasContext *, uint32_t);
|
bool disas_sve(DisasContext *, uint32_t);
|
||||||
bool disas_sme(DisasContext *, uint32_t);
|
bool disas_sme(DisasContext *, uint32_t);
|
||||||
|
|
||||||
|
@ -137,11 +137,11 @@ static bool gen_gvec_fpst_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool gen_gvec_fpst_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
|
static bool gen_gvec_fpst_ah_arg_zz(DisasContext *s, gen_helper_gvec_2_ptr *fn,
|
||||||
arg_rr_esz *a, int data)
|
arg_rr_esz *a, int data)
|
||||||
{
|
{
|
||||||
return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
|
return gen_gvec_fpst_zz(s, fn, a->rd, a->rn, data,
|
||||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
|
select_ah_fpst(s, a->esz));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Invoke an out-of-line helper on 3 Zregs. */
|
/* Invoke an out-of-line helper on 3 Zregs. */
|
||||||
@ -194,6 +194,13 @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
|
|||||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
|
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
|
||||||
|
arg_rrr_esz *a, int data)
|
||||||
|
{
|
||||||
|
return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
|
||||||
|
select_ah_fpst(s, a->esz));
|
||||||
|
}
|
||||||
|
|
||||||
/* Invoke an out-of-line helper on 4 Zregs. */
|
/* Invoke an out-of-line helper on 4 Zregs. */
|
||||||
static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
|
static bool gen_gvec_ool_zzzz(DisasContext *s, gen_helper_gvec_4 *fn,
|
||||||
int rd, int rn, int rm, int ra, int data)
|
int rd, int rn, int rm, int ra, int data)
|
||||||
@ -776,13 +783,23 @@ static gen_helper_gvec_3 * const fabs_fns[4] = {
|
|||||||
NULL, gen_helper_sve_fabs_h,
|
NULL, gen_helper_sve_fabs_h,
|
||||||
gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
|
gen_helper_sve_fabs_s, gen_helper_sve_fabs_d,
|
||||||
};
|
};
|
||||||
TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz, fabs_fns[a->esz], a, 0)
|
static gen_helper_gvec_3 * const fabs_ah_fns[4] = {
|
||||||
|
NULL, gen_helper_sve_ah_fabs_h,
|
||||||
|
gen_helper_sve_ah_fabs_s, gen_helper_sve_ah_fabs_d,
|
||||||
|
};
|
||||||
|
TRANS_FEAT(FABS, aa64_sve, gen_gvec_ool_arg_zpz,
|
||||||
|
s->fpcr_ah ? fabs_ah_fns[a->esz] : fabs_fns[a->esz], a, 0)
|
||||||
|
|
||||||
static gen_helper_gvec_3 * const fneg_fns[4] = {
|
static gen_helper_gvec_3 * const fneg_fns[4] = {
|
||||||
NULL, gen_helper_sve_fneg_h,
|
NULL, gen_helper_sve_fneg_h,
|
||||||
gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
|
gen_helper_sve_fneg_s, gen_helper_sve_fneg_d,
|
||||||
};
|
};
|
||||||
TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz, fneg_fns[a->esz], a, 0)
|
static gen_helper_gvec_3 * const fneg_ah_fns[4] = {
|
||||||
|
NULL, gen_helper_sve_ah_fneg_h,
|
||||||
|
gen_helper_sve_ah_fneg_s, gen_helper_sve_ah_fneg_d,
|
||||||
|
};
|
||||||
|
TRANS_FEAT(FNEG, aa64_sve, gen_gvec_ool_arg_zpz,
|
||||||
|
s->fpcr_ah ? fneg_ah_fns[a->esz] : fneg_fns[a->esz], a, 0)
|
||||||
|
|
||||||
static gen_helper_gvec_3 * const sxtb_fns[4] = {
|
static gen_helper_gvec_3 * const sxtb_fns[4] = {
|
||||||
NULL, gen_helper_sve_sxtb_h,
|
NULL, gen_helper_sve_sxtb_h,
|
||||||
@ -1221,14 +1238,14 @@ static gen_helper_gvec_2 * const fexpa_fns[4] = {
|
|||||||
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
|
gen_helper_sve_fexpa_s, gen_helper_sve_fexpa_d,
|
||||||
};
|
};
|
||||||
TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
|
TRANS_FEAT_NONSTREAMING(FEXPA, aa64_sve, gen_gvec_ool_zz,
|
||||||
fexpa_fns[a->esz], a->rd, a->rn, 0)
|
fexpa_fns[a->esz], a->rd, a->rn, s->fpcr_ah)
|
||||||
|
|
||||||
static gen_helper_gvec_3 * const ftssel_fns[4] = {
|
static gen_helper_gvec_3 * const ftssel_fns[4] = {
|
||||||
NULL, gen_helper_sve_ftssel_h,
|
NULL, gen_helper_sve_ftssel_h,
|
||||||
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
|
gen_helper_sve_ftssel_s, gen_helper_sve_ftssel_d,
|
||||||
};
|
};
|
||||||
TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
|
TRANS_FEAT_NONSTREAMING(FTSSEL, aa64_sve, gen_gvec_ool_arg_zzz,
|
||||||
ftssel_fns[a->esz], a, 0)
|
ftssel_fns[a->esz], a, s->fpcr_ah)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*** SVE Predicate Logical Operations Group
|
*** SVE Predicate Logical Operations Group
|
||||||
@ -3507,21 +3524,24 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
|
|||||||
*** SVE Floating Point Multiply-Add Indexed Group
|
*** SVE Floating Point Multiply-Add Indexed Group
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
|
static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = {
|
||||||
{
|
NULL, gen_helper_gvec_fmla_idx_h,
|
||||||
static gen_helper_gvec_4_ptr * const fns[4] = {
|
gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d
|
||||||
NULL,
|
};
|
||||||
gen_helper_gvec_fmla_idx_h,
|
TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
|
||||||
gen_helper_gvec_fmla_idx_s,
|
fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index,
|
||||||
gen_helper_gvec_fmla_idx_d,
|
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||||
};
|
|
||||||
return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
|
|
||||||
(a->index << 1) | sub,
|
|
||||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
|
|
||||||
}
|
|
||||||
|
|
||||||
TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
|
static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = {
|
||||||
TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
|
{ NULL, NULL },
|
||||||
|
{ gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h },
|
||||||
|
{ gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s },
|
||||||
|
{ gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d },
|
||||||
|
};
|
||||||
|
TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
|
||||||
|
fmls_idx_fns[a->esz][s->fpcr_ah],
|
||||||
|
a->rd, a->rn, a->rm, a->ra, a->index,
|
||||||
|
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*** SVE Floating Point Multiply Indexed Group
|
*** SVE Floating Point Multiply Indexed Group
|
||||||
@ -3581,11 +3601,23 @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
|
|||||||
}; \
|
}; \
|
||||||
TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
|
TRANS_FEAT(NAME, aa64_sve, do_reduce, a, name##_fns[a->esz])
|
||||||
|
|
||||||
|
#define DO_VPZ_AH(NAME, name) \
|
||||||
|
static gen_helper_fp_reduce * const name##_fns[4] = { \
|
||||||
|
NULL, gen_helper_sve_##name##_h, \
|
||||||
|
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
|
||||||
|
}; \
|
||||||
|
static gen_helper_fp_reduce * const name##_ah_fns[4] = { \
|
||||||
|
NULL, gen_helper_sve_ah_##name##_h, \
|
||||||
|
gen_helper_sve_ah_##name##_s, gen_helper_sve_ah_##name##_d, \
|
||||||
|
}; \
|
||||||
|
TRANS_FEAT(NAME, aa64_sve, do_reduce, a, \
|
||||||
|
s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
|
||||||
|
|
||||||
DO_VPZ(FADDV, faddv)
|
DO_VPZ(FADDV, faddv)
|
||||||
DO_VPZ(FMINNMV, fminnmv)
|
DO_VPZ(FMINNMV, fminnmv)
|
||||||
DO_VPZ(FMAXNMV, fmaxnmv)
|
DO_VPZ(FMAXNMV, fmaxnmv)
|
||||||
DO_VPZ(FMINV, fminv)
|
DO_VPZ_AH(FMINV, fminv)
|
||||||
DO_VPZ(FMAXV, fmaxv)
|
DO_VPZ_AH(FMAXV, fmaxv)
|
||||||
|
|
||||||
#undef DO_VPZ
|
#undef DO_VPZ
|
||||||
|
|
||||||
@ -3597,13 +3629,25 @@ static gen_helper_gvec_2_ptr * const frecpe_fns[] = {
|
|||||||
NULL, gen_helper_gvec_frecpe_h,
|
NULL, gen_helper_gvec_frecpe_h,
|
||||||
gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
|
gen_helper_gvec_frecpe_s, gen_helper_gvec_frecpe_d,
|
||||||
};
|
};
|
||||||
TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_arg_zz, frecpe_fns[a->esz], a, 0)
|
static gen_helper_gvec_2_ptr * const frecpe_rpres_fns[] = {
|
||||||
|
NULL, gen_helper_gvec_frecpe_h,
|
||||||
|
gen_helper_gvec_frecpe_rpres_s, gen_helper_gvec_frecpe_d,
|
||||||
|
};
|
||||||
|
TRANS_FEAT(FRECPE, aa64_sve, gen_gvec_fpst_ah_arg_zz,
|
||||||
|
s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
|
||||||
|
frecpe_rpres_fns[a->esz] : frecpe_fns[a->esz], a, 0)
|
||||||
|
|
||||||
static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
|
static gen_helper_gvec_2_ptr * const frsqrte_fns[] = {
|
||||||
NULL, gen_helper_gvec_frsqrte_h,
|
NULL, gen_helper_gvec_frsqrte_h,
|
||||||
gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
|
gen_helper_gvec_frsqrte_s, gen_helper_gvec_frsqrte_d,
|
||||||
};
|
};
|
||||||
TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_arg_zz, frsqrte_fns[a->esz], a, 0)
|
static gen_helper_gvec_2_ptr * const frsqrte_rpres_fns[] = {
|
||||||
|
NULL, gen_helper_gvec_frsqrte_h,
|
||||||
|
gen_helper_gvec_frsqrte_rpres_s, gen_helper_gvec_frsqrte_d,
|
||||||
|
};
|
||||||
|
TRANS_FEAT(FRSQRTE, aa64_sve, gen_gvec_fpst_ah_arg_zz,
|
||||||
|
s->fpcr_ah && dc_isar_feature(aa64_rpres, s) ?
|
||||||
|
frsqrte_rpres_fns[a->esz] : frsqrte_fns[a->esz], a, 0)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*** SVE Floating Point Compare with Zero Group
|
*** SVE Floating Point Compare with Zero Group
|
||||||
@ -3653,7 +3697,8 @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
|
|||||||
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
|
gen_helper_sve_ftmad_s, gen_helper_sve_ftmad_d,
|
||||||
};
|
};
|
||||||
TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
|
TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
|
||||||
ftmad_fns[a->esz], a->rd, a->rn, a->rm, a->imm,
|
ftmad_fns[a->esz], a->rd, a->rn, a->rm,
|
||||||
|
a->imm | (s->fpcr_ah << 3),
|
||||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -3707,11 +3752,23 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
|
|||||||
}; \
|
}; \
|
||||||
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
|
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_arg_zzz, name##_fns[a->esz], a, 0)
|
||||||
|
|
||||||
|
#define DO_FP3_AH(NAME, name) \
|
||||||
|
static gen_helper_gvec_3_ptr * const name##_fns[4] = { \
|
||||||
|
NULL, gen_helper_gvec_##name##_h, \
|
||||||
|
gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
|
||||||
|
}; \
|
||||||
|
static gen_helper_gvec_3_ptr * const name##_ah_fns[4] = { \
|
||||||
|
NULL, gen_helper_gvec_ah_##name##_h, \
|
||||||
|
gen_helper_gvec_ah_##name##_s, gen_helper_gvec_ah_##name##_d \
|
||||||
|
}; \
|
||||||
|
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_ah_arg_zzz, \
|
||||||
|
s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], a, 0)
|
||||||
|
|
||||||
DO_FP3(FADD_zzz, fadd)
|
DO_FP3(FADD_zzz, fadd)
|
||||||
DO_FP3(FSUB_zzz, fsub)
|
DO_FP3(FSUB_zzz, fsub)
|
||||||
DO_FP3(FMUL_zzz, fmul)
|
DO_FP3(FMUL_zzz, fmul)
|
||||||
DO_FP3(FRECPS, recps)
|
DO_FP3_AH(FRECPS, recps)
|
||||||
DO_FP3(FRSQRTS, rsqrts)
|
DO_FP3_AH(FRSQRTS, rsqrts)
|
||||||
|
|
||||||
#undef DO_FP3
|
#undef DO_FP3
|
||||||
|
|
||||||
@ -3733,14 +3790,27 @@ TRANS_FEAT_NONSTREAMING(FTSMUL, aa64_sve, gen_gvec_fpst_arg_zzz,
|
|||||||
}; \
|
}; \
|
||||||
TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
|
TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, name##_zpzz_fns[a->esz], a)
|
||||||
|
|
||||||
|
#define DO_ZPZZ_AH_FP(NAME, FEAT, name, ah_name) \
|
||||||
|
static gen_helper_gvec_4_ptr * const name##_zpzz_fns[4] = { \
|
||||||
|
NULL, gen_helper_##name##_h, \
|
||||||
|
gen_helper_##name##_s, gen_helper_##name##_d \
|
||||||
|
}; \
|
||||||
|
static gen_helper_gvec_4_ptr * const name##_ah_zpzz_fns[4] = { \
|
||||||
|
NULL, gen_helper_##ah_name##_h, \
|
||||||
|
gen_helper_##ah_name##_s, gen_helper_##ah_name##_d \
|
||||||
|
}; \
|
||||||
|
TRANS_FEAT(NAME, FEAT, gen_gvec_fpst_arg_zpzz, \
|
||||||
|
s->fpcr_ah ? name##_ah_zpzz_fns[a->esz] : \
|
||||||
|
name##_zpzz_fns[a->esz], a)
|
||||||
|
|
||||||
DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
|
DO_ZPZZ_FP(FADD_zpzz, aa64_sve, sve_fadd)
|
||||||
DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
|
DO_ZPZZ_FP(FSUB_zpzz, aa64_sve, sve_fsub)
|
||||||
DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
|
DO_ZPZZ_FP(FMUL_zpzz, aa64_sve, sve_fmul)
|
||||||
DO_ZPZZ_FP(FMIN_zpzz, aa64_sve, sve_fmin)
|
DO_ZPZZ_AH_FP(FMIN_zpzz, aa64_sve, sve_fmin, sve_ah_fmin)
|
||||||
DO_ZPZZ_FP(FMAX_zpzz, aa64_sve, sve_fmax)
|
DO_ZPZZ_AH_FP(FMAX_zpzz, aa64_sve, sve_fmax, sve_ah_fmax)
|
||||||
DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
|
DO_ZPZZ_FP(FMINNM_zpzz, aa64_sve, sve_fminnum)
|
||||||
DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
|
DO_ZPZZ_FP(FMAXNM_zpzz, aa64_sve, sve_fmaxnum)
|
||||||
DO_ZPZZ_FP(FABD, aa64_sve, sve_fabd)
|
DO_ZPZZ_AH_FP(FABD, aa64_sve, sve_fabd, sve_ah_fabd)
|
||||||
DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
|
DO_ZPZZ_FP(FSCALE, aa64_sve, sve_fscalbn)
|
||||||
DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
|
DO_ZPZZ_FP(FDIV, aa64_sve, sve_fdiv)
|
||||||
DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
|
DO_ZPZZ_FP(FMULX, aa64_sve, sve_fmulx)
|
||||||
@ -3795,14 +3865,35 @@ static bool do_fp_imm(DisasContext *s, arg_rpri_esz *a, uint64_t imm,
|
|||||||
TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
|
TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
|
||||||
name##_const[a->esz][a->imm], name##_fns[a->esz])
|
name##_const[a->esz][a->imm], name##_fns[a->esz])
|
||||||
|
|
||||||
|
#define DO_FP_AH_IMM(NAME, name, const0, const1) \
|
||||||
|
static gen_helper_sve_fp2scalar * const name##_fns[4] = { \
|
||||||
|
NULL, gen_helper_sve_##name##_h, \
|
||||||
|
gen_helper_sve_##name##_s, \
|
||||||
|
gen_helper_sve_##name##_d \
|
||||||
|
}; \
|
||||||
|
static gen_helper_sve_fp2scalar * const name##_ah_fns[4] = { \
|
||||||
|
NULL, gen_helper_sve_ah_##name##_h, \
|
||||||
|
gen_helper_sve_ah_##name##_s, \
|
||||||
|
gen_helper_sve_ah_##name##_d \
|
||||||
|
}; \
|
||||||
|
static uint64_t const name##_const[4][2] = { \
|
||||||
|
{ -1, -1 }, \
|
||||||
|
{ float16_##const0, float16_##const1 }, \
|
||||||
|
{ float32_##const0, float32_##const1 }, \
|
||||||
|
{ float64_##const0, float64_##const1 }, \
|
||||||
|
}; \
|
||||||
|
TRANS_FEAT(NAME##_zpzi, aa64_sve, do_fp_imm, a, \
|
||||||
|
name##_const[a->esz][a->imm], \
|
||||||
|
s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz])
|
||||||
|
|
||||||
DO_FP_IMM(FADD, fadds, half, one)
|
DO_FP_IMM(FADD, fadds, half, one)
|
||||||
DO_FP_IMM(FSUB, fsubs, half, one)
|
DO_FP_IMM(FSUB, fsubs, half, one)
|
||||||
DO_FP_IMM(FMUL, fmuls, half, two)
|
DO_FP_IMM(FMUL, fmuls, half, two)
|
||||||
DO_FP_IMM(FSUBR, fsubrs, half, one)
|
DO_FP_IMM(FSUBR, fsubrs, half, one)
|
||||||
DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
|
DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
|
||||||
DO_FP_IMM(FMINNM, fminnms, zero, one)
|
DO_FP_IMM(FMINNM, fminnms, zero, one)
|
||||||
DO_FP_IMM(FMAX, fmaxs, zero, one)
|
DO_FP_AH_IMM(FMAX, fmaxs, zero, one)
|
||||||
DO_FP_IMM(FMIN, fmins, zero, one)
|
DO_FP_AH_IMM(FMIN, fmins, zero, one)
|
||||||
|
|
||||||
#undef DO_FP_IMM
|
#undef DO_FP_IMM
|
||||||
|
|
||||||
@ -3846,22 +3937,28 @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
|
|||||||
gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
|
gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
|
||||||
};
|
};
|
||||||
TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
|
TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
|
||||||
a->rd, a->rn, a->rm, a->pg, a->rot,
|
a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
|
||||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||||
|
|
||||||
#define DO_FMLA(NAME, name) \
|
#define DO_FMLA(NAME, name, ah_name) \
|
||||||
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
|
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
|
||||||
NULL, gen_helper_sve_##name##_h, \
|
NULL, gen_helper_sve_##name##_h, \
|
||||||
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
|
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
|
||||||
}; \
|
}; \
|
||||||
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
|
static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \
|
||||||
|
NULL, gen_helper_sve_##ah_name##_h, \
|
||||||
|
gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \
|
||||||
|
}; \
|
||||||
|
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
|
||||||
|
s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
|
||||||
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
|
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
|
||||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||||
|
|
||||||
DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
|
/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
|
||||||
DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
|
DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
|
||||||
DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
|
DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz)
|
||||||
DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
|
DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz)
|
||||||
|
DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz)
|
||||||
|
|
||||||
#undef DO_FMLA
|
#undef DO_FMLA
|
||||||
|
|
||||||
@ -3870,7 +3967,7 @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
|
|||||||
gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
|
gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
|
||||||
};
|
};
|
||||||
TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
|
TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
|
||||||
a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
|
a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2),
|
||||||
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
||||||
|
|
||||||
static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
|
static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
|
||||||
@ -3890,7 +3987,8 @@ TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
|
|||||||
gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16)
|
gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16)
|
||||||
|
|
||||||
TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
|
TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
|
||||||
gen_helper_sve_bfcvt, a, 0, FPST_A64)
|
gen_helper_sve_bfcvt, a, 0,
|
||||||
|
s->fpcr_ah ? FPST_AH : FPST_A64)
|
||||||
|
|
||||||
TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
|
TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
|
||||||
gen_helper_sve_fcvt_dh, a, 0, FPST_A64)
|
gen_helper_sve_fcvt_dh, a, 0, FPST_A64)
|
||||||
@ -3993,7 +4091,7 @@ static gen_helper_gvec_3_ptr * const frecpx_fns[] = {
|
|||||||
gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
|
gen_helper_sve_frecpx_s, gen_helper_sve_frecpx_d,
|
||||||
};
|
};
|
||||||
TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
|
TRANS_FEAT(FRECPX, aa64_sve, gen_gvec_fpst_arg_zpz, frecpx_fns[a->esz],
|
||||||
a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
|
a, 0, select_ah_fpst(s, a->esz))
|
||||||
|
|
||||||
static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
|
static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
|
||||||
NULL, gen_helper_sve_fsqrt_h,
|
NULL, gen_helper_sve_fsqrt_h,
|
||||||
@ -7040,7 +7138,8 @@ TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
|
|||||||
gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64)
|
gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64)
|
||||||
|
|
||||||
TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
|
TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
|
||||||
gen_helper_sve_bfcvtnt, a, 0, FPST_A64)
|
gen_helper_sve_bfcvtnt, a, 0,
|
||||||
|
s->fpcr_ah ? FPST_AH : FPST_A64)
|
||||||
|
|
||||||
TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
|
TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
|
||||||
gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64)
|
gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64)
|
||||||
@ -7101,7 +7200,8 @@ TRANS_FEAT_NONSTREAMING(BFMMLA, aa64_sve_bf16, gen_gvec_env_arg_zzzz,
|
|||||||
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
|
static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
|
||||||
{
|
{
|
||||||
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
|
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
|
||||||
a->rd, a->rn, a->rm, a->ra, sel, FPST_A64);
|
a->rd, a->rn, a->rm, a->ra, sel,
|
||||||
|
s->fpcr_ah ? FPST_AH : FPST_A64);
|
||||||
}
|
}
|
||||||
|
|
||||||
TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
|
TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
|
||||||
@ -7111,7 +7211,8 @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
|
|||||||
{
|
{
|
||||||
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
|
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
|
||||||
a->rd, a->rn, a->rm, a->ra,
|
a->rd, a->rn, a->rm, a->ra,
|
||||||
(a->index << 1) | sel, FPST_A64);
|
(a->index << 1) | sel,
|
||||||
|
s->fpcr_ah ? FPST_AH : FPST_A64);
|
||||||
}
|
}
|
||||||
|
|
||||||
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
|
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
|
||||||
|
@ -155,6 +155,10 @@ typedef struct DisasContext {
|
|||||||
bool nv2_mem_e20;
|
bool nv2_mem_e20;
|
||||||
/* True if NV2 enabled and NV2 RAM accesses are big-endian */
|
/* True if NV2 enabled and NV2 RAM accesses are big-endian */
|
||||||
bool nv2_mem_be;
|
bool nv2_mem_be;
|
||||||
|
/* True if FPCR.AH is 1 (alternate floating point handling) */
|
||||||
|
bool fpcr_ah;
|
||||||
|
/* True if FPCR.NEP is 1 (FEAT_AFP scalar upper-element result handling) */
|
||||||
|
bool fpcr_nep;
|
||||||
/*
|
/*
|
||||||
* >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
|
* >= 0, a copy of PSTATE.BTYPE, which will be 0 without v8.5-BTI.
|
||||||
* < 0, set by the current instruction.
|
* < 0, set by the current instruction.
|
||||||
@ -666,66 +670,18 @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
|
|||||||
return (CPUARMTBFlags){ tb->flags, tb->cs_base };
|
return (CPUARMTBFlags){ tb->flags, tb->cs_base };
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Enum for argument to fpstatus_ptr().
|
|
||||||
*/
|
|
||||||
typedef enum ARMFPStatusFlavour {
|
|
||||||
FPST_A32,
|
|
||||||
FPST_A64,
|
|
||||||
FPST_A32_F16,
|
|
||||||
FPST_A64_F16,
|
|
||||||
FPST_STD,
|
|
||||||
FPST_STD_F16,
|
|
||||||
} ARMFPStatusFlavour;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* fpstatus_ptr: return TCGv_ptr to the specified fp_status field
|
* fpstatus_ptr: return TCGv_ptr to the specified fp_status field
|
||||||
*
|
*
|
||||||
* We have multiple softfloat float_status fields in the Arm CPU state struct
|
* We have multiple softfloat float_status fields in the Arm CPU state struct
|
||||||
* (see the comment in cpu.h for details). Return a TCGv_ptr which has
|
* (see the comment in cpu.h for details). Return a TCGv_ptr which has
|
||||||
* been set up to point to the requested field in the CPU state struct.
|
* been set up to point to the requested field in the CPU state struct.
|
||||||
* The options are:
|
|
||||||
*
|
|
||||||
* FPST_A32
|
|
||||||
* for AArch32 non-FP16 operations controlled by the FPCR
|
|
||||||
* FPST_A64
|
|
||||||
* for AArch64 non-FP16 operations controlled by the FPCR
|
|
||||||
* FPST_A32_F16
|
|
||||||
* for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
|
|
||||||
* FPST_A64_F16
|
|
||||||
* for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used
|
|
||||||
* FPST_STD
|
|
||||||
* for A32/T32 Neon operations using the "standard FPSCR value"
|
|
||||||
* FPST_STD_F16
|
|
||||||
* as FPST_STD, but where FPCR.FZ16 is to be used
|
|
||||||
*/
|
*/
|
||||||
static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
|
static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
|
||||||
{
|
{
|
||||||
TCGv_ptr statusptr = tcg_temp_new_ptr();
|
TCGv_ptr statusptr = tcg_temp_new_ptr();
|
||||||
int offset;
|
int offset = offsetof(CPUARMState, vfp.fp_status[flavour]);
|
||||||
|
|
||||||
switch (flavour) {
|
|
||||||
case FPST_A32:
|
|
||||||
offset = offsetof(CPUARMState, vfp.fp_status_a32);
|
|
||||||
break;
|
|
||||||
case FPST_A64:
|
|
||||||
offset = offsetof(CPUARMState, vfp.fp_status_a64);
|
|
||||||
break;
|
|
||||||
case FPST_A32_F16:
|
|
||||||
offset = offsetof(CPUARMState, vfp.fp_status_f16_a32);
|
|
||||||
break;
|
|
||||||
case FPST_A64_F16:
|
|
||||||
offset = offsetof(CPUARMState, vfp.fp_status_f16_a64);
|
|
||||||
break;
|
|
||||||
case FPST_STD:
|
|
||||||
offset = offsetof(CPUARMState, vfp.standard_fp_status);
|
|
||||||
break;
|
|
||||||
case FPST_STD_F16:
|
|
||||||
offset = offsetof(CPUARMState, vfp.standard_fp_status_f16);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
g_assert_not_reached();
|
|
||||||
}
|
|
||||||
tcg_gen_addi_ptr(statusptr, tcg_env, offset);
|
tcg_gen_addi_ptr(statusptr, tcg_env, offset);
|
||||||
return statusptr;
|
return statusptr;
|
||||||
}
|
}
|
||||||
|
@ -879,19 +879,21 @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
|
|||||||
float16 *d = vd;
|
float16 *d = vd;
|
||||||
float16 *n = vn;
|
float16 *n = vn;
|
||||||
float16 *m = vm;
|
float16 *m = vm;
|
||||||
uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
|
bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
uint32_t neg_imag = neg_real ^ 1;
|
bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
uintptr_t i;
|
uintptr_t i;
|
||||||
|
|
||||||
/* Shift boolean to the sign bit so we can xor to negate. */
|
|
||||||
neg_real <<= 15;
|
|
||||||
neg_imag <<= 15;
|
|
||||||
|
|
||||||
for (i = 0; i < opr_sz / 2; i += 2) {
|
for (i = 0; i < opr_sz / 2; i += 2) {
|
||||||
float16 e0 = n[H2(i)];
|
float16 e0 = n[H2(i)];
|
||||||
float16 e1 = m[H2(i + 1)] ^ neg_imag;
|
float16 e1 = m[H2(i + 1)];
|
||||||
float16 e2 = n[H2(i + 1)];
|
float16 e2 = n[H2(i + 1)];
|
||||||
float16 e3 = m[H2(i)] ^ neg_real;
|
float16 e3 = m[H2(i)];
|
||||||
|
|
||||||
|
if (rot) {
|
||||||
|
e3 = float16_maybe_ah_chs(e3, fpcr_ah);
|
||||||
|
} else {
|
||||||
|
e1 = float16_maybe_ah_chs(e1, fpcr_ah);
|
||||||
|
}
|
||||||
|
|
||||||
d[H2(i)] = float16_add(e0, e1, fpst);
|
d[H2(i)] = float16_add(e0, e1, fpst);
|
||||||
d[H2(i + 1)] = float16_add(e2, e3, fpst);
|
d[H2(i + 1)] = float16_add(e2, e3, fpst);
|
||||||
@ -906,19 +908,21 @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm,
|
|||||||
float32 *d = vd;
|
float32 *d = vd;
|
||||||
float32 *n = vn;
|
float32 *n = vn;
|
||||||
float32 *m = vm;
|
float32 *m = vm;
|
||||||
uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
|
bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
uint32_t neg_imag = neg_real ^ 1;
|
bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
uintptr_t i;
|
uintptr_t i;
|
||||||
|
|
||||||
/* Shift boolean to the sign bit so we can xor to negate. */
|
|
||||||
neg_real <<= 31;
|
|
||||||
neg_imag <<= 31;
|
|
||||||
|
|
||||||
for (i = 0; i < opr_sz / 4; i += 2) {
|
for (i = 0; i < opr_sz / 4; i += 2) {
|
||||||
float32 e0 = n[H4(i)];
|
float32 e0 = n[H4(i)];
|
||||||
float32 e1 = m[H4(i + 1)] ^ neg_imag;
|
float32 e1 = m[H4(i + 1)];
|
||||||
float32 e2 = n[H4(i + 1)];
|
float32 e2 = n[H4(i + 1)];
|
||||||
float32 e3 = m[H4(i)] ^ neg_real;
|
float32 e3 = m[H4(i)];
|
||||||
|
|
||||||
|
if (rot) {
|
||||||
|
e3 = float32_maybe_ah_chs(e3, fpcr_ah);
|
||||||
|
} else {
|
||||||
|
e1 = float32_maybe_ah_chs(e1, fpcr_ah);
|
||||||
|
}
|
||||||
|
|
||||||
d[H4(i)] = float32_add(e0, e1, fpst);
|
d[H4(i)] = float32_add(e0, e1, fpst);
|
||||||
d[H4(i + 1)] = float32_add(e2, e3, fpst);
|
d[H4(i + 1)] = float32_add(e2, e3, fpst);
|
||||||
@ -933,19 +937,21 @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm,
|
|||||||
float64 *d = vd;
|
float64 *d = vd;
|
||||||
float64 *n = vn;
|
float64 *n = vn;
|
||||||
float64 *m = vm;
|
float64 *m = vm;
|
||||||
uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1);
|
bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
uint64_t neg_imag = neg_real ^ 1;
|
bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
uintptr_t i;
|
uintptr_t i;
|
||||||
|
|
||||||
/* Shift boolean to the sign bit so we can xor to negate. */
|
|
||||||
neg_real <<= 63;
|
|
||||||
neg_imag <<= 63;
|
|
||||||
|
|
||||||
for (i = 0; i < opr_sz / 8; i += 2) {
|
for (i = 0; i < opr_sz / 8; i += 2) {
|
||||||
float64 e0 = n[i];
|
float64 e0 = n[i];
|
||||||
float64 e1 = m[i + 1] ^ neg_imag;
|
float64 e1 = m[i + 1];
|
||||||
float64 e2 = n[i + 1];
|
float64 e2 = n[i + 1];
|
||||||
float64 e3 = m[i] ^ neg_real;
|
float64 e3 = m[i];
|
||||||
|
|
||||||
|
if (rot) {
|
||||||
|
e3 = float64_maybe_ah_chs(e3, fpcr_ah);
|
||||||
|
} else {
|
||||||
|
e1 = float64_maybe_ah_chs(e1, fpcr_ah);
|
||||||
|
}
|
||||||
|
|
||||||
d[i] = float64_add(e0, e1, fpst);
|
d[i] = float64_add(e0, e1, fpst);
|
||||||
d[i + 1] = float64_add(e2, e3, fpst);
|
d[i + 1] = float64_add(e2, e3, fpst);
|
||||||
@ -959,22 +965,26 @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va,
|
|||||||
uintptr_t opr_sz = simd_oprsz(desc);
|
uintptr_t opr_sz = simd_oprsz(desc);
|
||||||
float16 *d = vd, *n = vn, *m = vm, *a = va;
|
float16 *d = vd, *n = vn, *m = vm, *a = va;
|
||||||
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
|
||||||
uint32_t neg_real = flip ^ neg_imag;
|
uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
|
uint32_t negf_real = flip ^ negf_imag;
|
||||||
|
float16 negx_imag, negx_real;
|
||||||
uintptr_t i;
|
uintptr_t i;
|
||||||
|
|
||||||
/* Shift boolean to the sign bit so we can xor to negate. */
|
/* With AH=0, use negx; with AH=1 use negf. */
|
||||||
neg_real <<= 15;
|
negx_real = (negf_real & ~fpcr_ah) << 15;
|
||||||
neg_imag <<= 15;
|
negx_imag = (negf_imag & ~fpcr_ah) << 15;
|
||||||
|
negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
|
||||||
for (i = 0; i < opr_sz / 2; i += 2) {
|
for (i = 0; i < opr_sz / 2; i += 2) {
|
||||||
float16 e2 = n[H2(i + flip)];
|
float16 e2 = n[H2(i + flip)];
|
||||||
float16 e1 = m[H2(i + flip)] ^ neg_real;
|
float16 e1 = m[H2(i + flip)] ^ negx_real;
|
||||||
float16 e4 = e2;
|
float16 e4 = e2;
|
||||||
float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag;
|
float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag;
|
||||||
|
|
||||||
d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst);
|
d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst);
|
||||||
d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst);
|
d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst);
|
||||||
}
|
}
|
||||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||||
}
|
}
|
||||||
@ -985,29 +995,33 @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va,
|
|||||||
uintptr_t opr_sz = simd_oprsz(desc);
|
uintptr_t opr_sz = simd_oprsz(desc);
|
||||||
float16 *d = vd, *n = vn, *m = vm, *a = va;
|
float16 *d = vd, *n = vn, *m = vm, *a = va;
|
||||||
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
|
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
|
||||||
uint32_t neg_real = flip ^ neg_imag;
|
uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
|
||||||
|
uint32_t negf_real = flip ^ negf_imag;
|
||||||
intptr_t elements = opr_sz / sizeof(float16);
|
intptr_t elements = opr_sz / sizeof(float16);
|
||||||
intptr_t eltspersegment = MIN(16 / sizeof(float16), elements);
|
intptr_t eltspersegment = MIN(16 / sizeof(float16), elements);
|
||||||
|
float16 negx_imag, negx_real;
|
||||||
intptr_t i, j;
|
intptr_t i, j;
|
||||||
|
|
||||||
/* Shift boolean to the sign bit so we can xor to negate. */
|
/* With AH=0, use negx; with AH=1 use negf. */
|
||||||
neg_real <<= 15;
|
negx_real = (negf_real & ~fpcr_ah) << 15;
|
||||||
neg_imag <<= 15;
|
negx_imag = (negf_imag & ~fpcr_ah) << 15;
|
||||||
|
negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
|
||||||
for (i = 0; i < elements; i += eltspersegment) {
|
for (i = 0; i < elements; i += eltspersegment) {
|
||||||
float16 mr = m[H2(i + 2 * index + 0)];
|
float16 mr = m[H2(i + 2 * index + 0)];
|
||||||
float16 mi = m[H2(i + 2 * index + 1)];
|
float16 mi = m[H2(i + 2 * index + 1)];
|
||||||
float16 e1 = neg_real ^ (flip ? mi : mr);
|
float16 e1 = negx_real ^ (flip ? mi : mr);
|
||||||
float16 e3 = neg_imag ^ (flip ? mr : mi);
|
float16 e3 = negx_imag ^ (flip ? mr : mi);
|
||||||
|
|
||||||
for (j = i; j < i + eltspersegment; j += 2) {
|
for (j = i; j < i + eltspersegment; j += 2) {
|
||||||
float16 e2 = n[H2(j + flip)];
|
float16 e2 = n[H2(j + flip)];
|
||||||
float16 e4 = e2;
|
float16 e4 = e2;
|
||||||
|
|
||||||
d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst);
|
d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst);
|
||||||
d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst);
|
d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||||
@ -1019,22 +1033,26 @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va,
|
|||||||
uintptr_t opr_sz = simd_oprsz(desc);
|
uintptr_t opr_sz = simd_oprsz(desc);
|
||||||
float32 *d = vd, *n = vn, *m = vm, *a = va;
|
float32 *d = vd, *n = vn, *m = vm, *a = va;
|
||||||
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
|
||||||
uint32_t neg_real = flip ^ neg_imag;
|
uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
|
uint32_t negf_real = flip ^ negf_imag;
|
||||||
|
float32 negx_imag, negx_real;
|
||||||
uintptr_t i;
|
uintptr_t i;
|
||||||
|
|
||||||
/* Shift boolean to the sign bit so we can xor to negate. */
|
/* With AH=0, use negx; with AH=1 use negf. */
|
||||||
neg_real <<= 31;
|
negx_real = (negf_real & ~fpcr_ah) << 31;
|
||||||
neg_imag <<= 31;
|
negx_imag = (negf_imag & ~fpcr_ah) << 31;
|
||||||
|
negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
|
||||||
for (i = 0; i < opr_sz / 4; i += 2) {
|
for (i = 0; i < opr_sz / 4; i += 2) {
|
||||||
float32 e2 = n[H4(i + flip)];
|
float32 e2 = n[H4(i + flip)];
|
||||||
float32 e1 = m[H4(i + flip)] ^ neg_real;
|
float32 e1 = m[H4(i + flip)] ^ negx_real;
|
||||||
float32 e4 = e2;
|
float32 e4 = e2;
|
||||||
float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag;
|
float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag;
|
||||||
|
|
||||||
d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst);
|
d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst);
|
||||||
d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst);
|
d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst);
|
||||||
}
|
}
|
||||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||||
}
|
}
|
||||||
@ -1045,29 +1063,33 @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va,
|
|||||||
uintptr_t opr_sz = simd_oprsz(desc);
|
uintptr_t opr_sz = simd_oprsz(desc);
|
||||||
float32 *d = vd, *n = vn, *m = vm, *a = va;
|
float32 *d = vd, *n = vn, *m = vm, *a = va;
|
||||||
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
|
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
|
||||||
uint32_t neg_real = flip ^ neg_imag;
|
uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
|
||||||
|
uint32_t negf_real = flip ^ negf_imag;
|
||||||
intptr_t elements = opr_sz / sizeof(float32);
|
intptr_t elements = opr_sz / sizeof(float32);
|
||||||
intptr_t eltspersegment = MIN(16 / sizeof(float32), elements);
|
intptr_t eltspersegment = MIN(16 / sizeof(float32), elements);
|
||||||
|
float32 negx_imag, negx_real;
|
||||||
intptr_t i, j;
|
intptr_t i, j;
|
||||||
|
|
||||||
/* Shift boolean to the sign bit so we can xor to negate. */
|
/* With AH=0, use negx; with AH=1 use negf. */
|
||||||
neg_real <<= 31;
|
negx_real = (negf_real & ~fpcr_ah) << 31;
|
||||||
neg_imag <<= 31;
|
negx_imag = (negf_imag & ~fpcr_ah) << 31;
|
||||||
|
negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
|
||||||
for (i = 0; i < elements; i += eltspersegment) {
|
for (i = 0; i < elements; i += eltspersegment) {
|
||||||
float32 mr = m[H4(i + 2 * index + 0)];
|
float32 mr = m[H4(i + 2 * index + 0)];
|
||||||
float32 mi = m[H4(i + 2 * index + 1)];
|
float32 mi = m[H4(i + 2 * index + 1)];
|
||||||
float32 e1 = neg_real ^ (flip ? mi : mr);
|
float32 e1 = negx_real ^ (flip ? mi : mr);
|
||||||
float32 e3 = neg_imag ^ (flip ? mr : mi);
|
float32 e3 = negx_imag ^ (flip ? mr : mi);
|
||||||
|
|
||||||
for (j = i; j < i + eltspersegment; j += 2) {
|
for (j = i; j < i + eltspersegment; j += 2) {
|
||||||
float32 e2 = n[H4(j + flip)];
|
float32 e2 = n[H4(j + flip)];
|
||||||
float32 e4 = e2;
|
float32 e4 = e2;
|
||||||
|
|
||||||
d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst);
|
d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst);
|
||||||
d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst);
|
d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||||
@ -1079,22 +1101,26 @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va,
|
|||||||
uintptr_t opr_sz = simd_oprsz(desc);
|
uintptr_t opr_sz = simd_oprsz(desc);
|
||||||
float64 *d = vd, *n = vn, *m = vm, *a = va;
|
float64 *d = vd, *n = vn, *m = vm, *a = va;
|
||||||
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
|
||||||
uint64_t neg_real = flip ^ neg_imag;
|
uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
|
uint32_t negf_real = flip ^ negf_imag;
|
||||||
|
float64 negx_real, negx_imag;
|
||||||
uintptr_t i;
|
uintptr_t i;
|
||||||
|
|
||||||
/* Shift boolean to the sign bit so we can xor to negate. */
|
/* With AH=0, use negx; with AH=1 use negf. */
|
||||||
neg_real <<= 63;
|
negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63;
|
||||||
neg_imag <<= 63;
|
negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63;
|
||||||
|
negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
|
||||||
|
|
||||||
for (i = 0; i < opr_sz / 8; i += 2) {
|
for (i = 0; i < opr_sz / 8; i += 2) {
|
||||||
float64 e2 = n[i + flip];
|
float64 e2 = n[i + flip];
|
||||||
float64 e1 = m[i + flip] ^ neg_real;
|
float64 e1 = m[i + flip] ^ negx_real;
|
||||||
float64 e4 = e2;
|
float64 e4 = e2;
|
||||||
float64 e3 = m[i + 1 - flip] ^ neg_imag;
|
float64 e3 = m[i + 1 - flip] ^ negx_imag;
|
||||||
|
|
||||||
d[i] = float64_muladd(e2, e1, a[i], 0, fpst);
|
d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst);
|
||||||
d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst);
|
d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst);
|
||||||
}
|
}
|
||||||
clear_tail(d, opr_sz, simd_maxsz(desc));
|
clear_tail(d, opr_sz, simd_maxsz(desc));
|
||||||
}
|
}
|
||||||
@ -1210,10 +1236,12 @@ void HELPER(NAME)(void *vd, void *vn, float_status *stat, uint32_t desc) \
|
|||||||
|
|
||||||
DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16)
|
DO_2OP(gvec_frecpe_h, helper_recpe_f16, float16)
|
||||||
DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32)
|
DO_2OP(gvec_frecpe_s, helper_recpe_f32, float32)
|
||||||
|
DO_2OP(gvec_frecpe_rpres_s, helper_recpe_rpres_f32, float32)
|
||||||
DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64)
|
DO_2OP(gvec_frecpe_d, helper_recpe_f64, float64)
|
||||||
|
|
||||||
DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16)
|
DO_2OP(gvec_frsqrte_h, helper_rsqrte_f16, float16)
|
||||||
DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32)
|
DO_2OP(gvec_frsqrte_s, helper_rsqrte_f32, float32)
|
||||||
|
DO_2OP(gvec_frsqrte_rpres_s, helper_rsqrte_rpres_f32, float32)
|
||||||
DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64)
|
DO_2OP(gvec_frsqrte_d, helper_rsqrte_f64, float64)
|
||||||
|
|
||||||
DO_2OP(gvec_vrintx_h, float16_round_to_int, float16)
|
DO_2OP(gvec_vrintx_h, float16_round_to_int, float16)
|
||||||
@ -1302,6 +1330,25 @@ static float64 float64_abd(float64 op1, float64 op2, float_status *stat)
|
|||||||
return float64_abs(float64_sub(op1, op2, stat));
|
return float64_abs(float64_sub(op1, op2, stat));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* ABD when FPCR.AH = 1: avoid flipping sign bit of a NaN result */
|
||||||
|
static float16 float16_ah_abd(float16 op1, float16 op2, float_status *stat)
|
||||||
|
{
|
||||||
|
float16 r = float16_sub(op1, op2, stat);
|
||||||
|
return float16_is_any_nan(r) ? r : float16_abs(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float32 float32_ah_abd(float32 op1, float32 op2, float_status *stat)
|
||||||
|
{
|
||||||
|
float32 r = float32_sub(op1, op2, stat);
|
||||||
|
return float32_is_any_nan(r) ? r : float32_abs(r);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float64 float64_ah_abd(float64 op1, float64 op2, float_status *stat)
|
||||||
|
{
|
||||||
|
float64 r = float64_sub(op1, op2, stat);
|
||||||
|
return float64_is_any_nan(r) ? r : float64_abs(r);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reciprocal step. These are the AArch32 version which uses a
|
* Reciprocal step. These are the AArch32 version which uses a
|
||||||
* non-fused multiply-and-subtract.
|
* non-fused multiply-and-subtract.
|
||||||
@ -1389,6 +1436,10 @@ DO_3OP(gvec_fabd_h, float16_abd, float16)
|
|||||||
DO_3OP(gvec_fabd_s, float32_abd, float32)
|
DO_3OP(gvec_fabd_s, float32_abd, float32)
|
||||||
DO_3OP(gvec_fabd_d, float64_abd, float64)
|
DO_3OP(gvec_fabd_d, float64_abd, float64)
|
||||||
|
|
||||||
|
DO_3OP(gvec_ah_fabd_h, float16_ah_abd, float16)
|
||||||
|
DO_3OP(gvec_ah_fabd_s, float32_ah_abd, float32)
|
||||||
|
DO_3OP(gvec_ah_fabd_d, float64_ah_abd, float64)
|
||||||
|
|
||||||
DO_3OP(gvec_fceq_h, float16_ceq, float16)
|
DO_3OP(gvec_fceq_h, float16_ceq, float16)
|
||||||
DO_3OP(gvec_fceq_s, float32_ceq, float32)
|
DO_3OP(gvec_fceq_s, float32_ceq, float32)
|
||||||
DO_3OP(gvec_fceq_d, float64_ceq, float64)
|
DO_3OP(gvec_fceq_d, float64_ceq, float64)
|
||||||
@ -1448,6 +1499,22 @@ DO_3OP(gvec_rsqrts_h, helper_rsqrtsf_f16, float16)
|
|||||||
DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32)
|
DO_3OP(gvec_rsqrts_s, helper_rsqrtsf_f32, float32)
|
||||||
DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
|
DO_3OP(gvec_rsqrts_d, helper_rsqrtsf_f64, float64)
|
||||||
|
|
||||||
|
DO_3OP(gvec_ah_recps_h, helper_recpsf_ah_f16, float16)
|
||||||
|
DO_3OP(gvec_ah_recps_s, helper_recpsf_ah_f32, float32)
|
||||||
|
DO_3OP(gvec_ah_recps_d, helper_recpsf_ah_f64, float64)
|
||||||
|
|
||||||
|
DO_3OP(gvec_ah_rsqrts_h, helper_rsqrtsf_ah_f16, float16)
|
||||||
|
DO_3OP(gvec_ah_rsqrts_s, helper_rsqrtsf_ah_f32, float32)
|
||||||
|
DO_3OP(gvec_ah_rsqrts_d, helper_rsqrtsf_ah_f64, float64)
|
||||||
|
|
||||||
|
DO_3OP(gvec_ah_fmax_h, helper_vfp_ah_maxh, float16)
|
||||||
|
DO_3OP(gvec_ah_fmax_s, helper_vfp_ah_maxs, float32)
|
||||||
|
DO_3OP(gvec_ah_fmax_d, helper_vfp_ah_maxd, float64)
|
||||||
|
|
||||||
|
DO_3OP(gvec_ah_fmin_h, helper_vfp_ah_minh, float16)
|
||||||
|
DO_3OP(gvec_ah_fmin_s, helper_vfp_ah_mins, float32)
|
||||||
|
DO_3OP(gvec_ah_fmin_d, helper_vfp_ah_mind, float64)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
#undef DO_3OP
|
#undef DO_3OP
|
||||||
|
|
||||||
@ -1513,6 +1580,24 @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2,
|
|||||||
return float64_muladd(float64_chs(op1), op2, dest, 0, stat);
|
return float64_muladd(float64_chs(op1), op2, dest, 0, stat);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2,
|
||||||
|
float_status *stat)
|
||||||
|
{
|
||||||
|
return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2,
|
||||||
|
float_status *stat)
|
||||||
|
{
|
||||||
|
return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2,
|
||||||
|
float_status *stat)
|
||||||
|
{
|
||||||
|
return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat);
|
||||||
|
}
|
||||||
|
|
||||||
#define DO_MULADD(NAME, FUNC, TYPE) \
|
#define DO_MULADD(NAME, FUNC, TYPE) \
|
||||||
void HELPER(NAME)(void *vd, void *vn, void *vm, \
|
void HELPER(NAME)(void *vd, void *vn, void *vm, \
|
||||||
float_status *stat, uint32_t desc) \
|
float_status *stat, uint32_t desc) \
|
||||||
@ -1539,6 +1624,10 @@ DO_MULADD(gvec_vfms_h, float16_mulsub_f, float16)
|
|||||||
DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
|
DO_MULADD(gvec_vfms_s, float32_mulsub_f, float32)
|
||||||
DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64)
|
DO_MULADD(gvec_vfms_d, float64_mulsub_f, float64)
|
||||||
|
|
||||||
|
DO_MULADD(gvec_ah_vfms_h, float16_ah_mulsub_f, float16)
|
||||||
|
DO_MULADD(gvec_ah_vfms_s, float32_ah_mulsub_f, float32)
|
||||||
|
DO_MULADD(gvec_ah_vfms_d, float64_ah_mulsub_f, float64)
|
||||||
|
|
||||||
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
|
/* For the indexed ops, SVE applies the index per 128-bit vector segment.
|
||||||
* For AdvSIMD, there is of course only one such vector segment.
|
* For AdvSIMD, there is of course only one such vector segment.
|
||||||
*/
|
*/
|
||||||
@ -1635,29 +1724,35 @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4)
|
|||||||
|
|
||||||
#undef DO_FMUL_IDX
|
#undef DO_FMUL_IDX
|
||||||
|
|
||||||
#define DO_FMLA_IDX(NAME, TYPE, H) \
|
#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \
|
||||||
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
|
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
|
||||||
float_status *stat, uint32_t desc) \
|
float_status *stat, uint32_t desc) \
|
||||||
{ \
|
{ \
|
||||||
intptr_t i, j, oprsz = simd_oprsz(desc); \
|
intptr_t i, j, oprsz = simd_oprsz(desc); \
|
||||||
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
|
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
|
||||||
TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
|
intptr_t idx = simd_data(desc); \
|
||||||
intptr_t idx = desc >> (SIMD_DATA_SHIFT + 1); \
|
|
||||||
TYPE *d = vd, *n = vn, *m = vm, *a = va; \
|
TYPE *d = vd, *n = vn, *m = vm, *a = va; \
|
||||||
op1_neg <<= (8 * sizeof(TYPE) - 1); \
|
|
||||||
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
|
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
|
||||||
TYPE mm = m[H(i + idx)]; \
|
TYPE mm = m[H(i + idx)]; \
|
||||||
for (j = 0; j < segment; j++) { \
|
for (j = 0; j < segment; j++) { \
|
||||||
d[i + j] = TYPE##_muladd(n[i + j] ^ op1_neg, \
|
d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \
|
||||||
mm, a[i + j], 0, stat); \
|
a[i + j], NEGF, stat); \
|
||||||
} \
|
} \
|
||||||
} \
|
} \
|
||||||
clear_tail(d, oprsz, simd_maxsz(desc)); \
|
clear_tail(d, oprsz, simd_maxsz(desc)); \
|
||||||
}
|
}
|
||||||
|
|
||||||
DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
|
DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0)
|
||||||
DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
|
DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0)
|
||||||
DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8)
|
DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0)
|
||||||
|
|
||||||
|
DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0)
|
||||||
|
DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0)
|
||||||
|
DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0)
|
||||||
|
|
||||||
|
DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product)
|
||||||
|
DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product)
|
||||||
|
DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product)
|
||||||
|
|
||||||
#undef DO_FMLA_IDX
|
#undef DO_FMLA_IDX
|
||||||
|
|
||||||
@ -2030,28 +2125,29 @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2)
|
|||||||
* as there is not yet SVE versions that might use blocking.
|
* as there is not yet SVE versions that might use blocking.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
|
static void do_fmlal(float32 *d, void *vn, void *vm,
|
||||||
uint32_t desc, bool fz16)
|
CPUARMState *env, uint32_t desc,
|
||||||
|
ARMFPStatusFlavour fpst_idx,
|
||||||
|
uint64_t negx, int negf)
|
||||||
{
|
{
|
||||||
|
float_status *fpst = &env->vfp.fp_status[fpst_idx];
|
||||||
|
bool fz16 = env->vfp.fpcr & FPCR_FZ16;
|
||||||
intptr_t i, oprsz = simd_oprsz(desc);
|
intptr_t i, oprsz = simd_oprsz(desc);
|
||||||
int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
|
|
||||||
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
int is_q = oprsz == 16;
|
int is_q = oprsz == 16;
|
||||||
uint64_t n_4, m_4;
|
uint64_t n_4, m_4;
|
||||||
|
|
||||||
/* Pre-load all of the f16 data, avoiding overlap issues. */
|
/*
|
||||||
n_4 = load4_f16(vn, is_q, is_2);
|
* Pre-load all of the f16 data, avoiding overlap issues.
|
||||||
|
* Negate all inputs for AH=0 FMLSL at once.
|
||||||
|
*/
|
||||||
|
n_4 = load4_f16(vn, is_q, is_2) ^ negx;
|
||||||
m_4 = load4_f16(vm, is_q, is_2);
|
m_4 = load4_f16(vm, is_q, is_2);
|
||||||
|
|
||||||
/* Negate all inputs for FMLSL at once. */
|
|
||||||
if (is_s) {
|
|
||||||
n_4 ^= 0x8000800080008000ull;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < oprsz / 4; i++) {
|
for (i = 0; i < oprsz / 4; i++) {
|
||||||
float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
|
float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
|
||||||
float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16);
|
float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16);
|
||||||
d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
|
d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst);
|
||||||
}
|
}
|
||||||
clear_tail(d, oprsz, simd_maxsz(desc));
|
clear_tail(d, oprsz, simd_maxsz(desc));
|
||||||
}
|
}
|
||||||
@ -2059,61 +2155,82 @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
|
|||||||
void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
|
void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
|
||||||
CPUARMState *env, uint32_t desc)
|
CPUARMState *env, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc,
|
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
|
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
|
||||||
|
|
||||||
|
do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
|
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
|
||||||
CPUARMState *env, uint32_t desc)
|
CPUARMState *env, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc,
|
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
|
uint64_t negx = 0;
|
||||||
|
int negf = 0;
|
||||||
|
|
||||||
|
if (is_s) {
|
||||||
|
if (env->vfp.fpcr & FPCR_AH) {
|
||||||
|
negf = float_muladd_negate_product;
|
||||||
|
} else {
|
||||||
|
negx = 0x8000800080008000ull;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
|
||||||
CPUARMState *env, uint32_t desc)
|
CPUARMState *env, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t i, oprsz = simd_oprsz(desc);
|
intptr_t i, oprsz = simd_oprsz(desc);
|
||||||
uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
|
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
|
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
|
||||||
float_status *status = &env->vfp.fp_status_a64;
|
float_status *status = &env->vfp.fp_status[FPST_A64];
|
||||||
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
|
bool fz16 = env->vfp.fpcr & FPCR_FZ16;
|
||||||
|
int negx = 0, negf = 0;
|
||||||
|
|
||||||
|
if (is_s) {
|
||||||
|
if (env->vfp.fpcr & FPCR_AH) {
|
||||||
|
negf = float_muladd_negate_product;
|
||||||
|
} else {
|
||||||
|
negx = 0x8000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < oprsz; i += sizeof(float32)) {
|
for (i = 0; i < oprsz; i += sizeof(float32)) {
|
||||||
float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn;
|
float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx;
|
||||||
float16 mm_16 = *(float16 *)(vm + H1_2(i + sel));
|
float16 mm_16 = *(float16 *)(vm + H1_2(i + sel));
|
||||||
float32 nn = float16_to_float32_by_bits(nn_16, fz16);
|
float32 nn = float16_to_float32_by_bits(nn_16, fz16);
|
||||||
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
|
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
|
||||||
float32 aa = *(float32 *)(va + H1_4(i));
|
float32 aa = *(float32 *)(va + H1_4(i));
|
||||||
|
|
||||||
*(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status);
|
*(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
|
static void do_fmlal_idx(float32 *d, void *vn, void *vm,
|
||||||
uint32_t desc, bool fz16)
|
CPUARMState *env, uint32_t desc,
|
||||||
|
ARMFPStatusFlavour fpst_idx,
|
||||||
|
uint64_t negx, int negf)
|
||||||
{
|
{
|
||||||
|
float_status *fpst = &env->vfp.fp_status[fpst_idx];
|
||||||
|
bool fz16 = env->vfp.fpcr & FPCR_FZ16;
|
||||||
intptr_t i, oprsz = simd_oprsz(desc);
|
intptr_t i, oprsz = simd_oprsz(desc);
|
||||||
int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
|
|
||||||
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
|
||||||
int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
|
int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
|
||||||
int is_q = oprsz == 16;
|
int is_q = oprsz == 16;
|
||||||
uint64_t n_4;
|
uint64_t n_4;
|
||||||
float32 m_1;
|
float32 m_1;
|
||||||
|
|
||||||
/* Pre-load all of the f16 data, avoiding overlap issues. */
|
/*
|
||||||
n_4 = load4_f16(vn, is_q, is_2);
|
* Pre-load all of the f16 data, avoiding overlap issues.
|
||||||
|
* Negate all inputs for AH=0 FMLSL at once.
|
||||||
/* Negate all inputs for FMLSL at once. */
|
*/
|
||||||
if (is_s) {
|
n_4 = load4_f16(vn, is_q, is_2) ^ negx;
|
||||||
n_4 ^= 0x8000800080008000ull;
|
|
||||||
}
|
|
||||||
|
|
||||||
m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16);
|
m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16);
|
||||||
|
|
||||||
for (i = 0; i < oprsz / 4; i++) {
|
for (i = 0; i < oprsz / 4; i++) {
|
||||||
float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
|
float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
|
||||||
d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
|
d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst);
|
||||||
}
|
}
|
||||||
clear_tail(d, oprsz, simd_maxsz(desc));
|
clear_tail(d, oprsz, simd_maxsz(desc));
|
||||||
}
|
}
|
||||||
@ -2121,38 +2238,58 @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
|
|||||||
void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
|
void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
|
||||||
CPUARMState *env, uint32_t desc)
|
CPUARMState *env, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc,
|
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
|
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
|
||||||
|
|
||||||
|
do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
|
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
|
||||||
CPUARMState *env, uint32_t desc)
|
CPUARMState *env, uint32_t desc)
|
||||||
{
|
{
|
||||||
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc,
|
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
|
uint64_t negx = 0;
|
||||||
|
int negf = 0;
|
||||||
|
|
||||||
|
if (is_s) {
|
||||||
|
if (env->vfp.fpcr & FPCR_AH) {
|
||||||
|
negf = float_muladd_negate_product;
|
||||||
|
} else {
|
||||||
|
negx = 0x8000800080008000ull;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
|
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
|
||||||
CPUARMState *env, uint32_t desc)
|
CPUARMState *env, uint32_t desc)
|
||||||
{
|
{
|
||||||
intptr_t i, j, oprsz = simd_oprsz(desc);
|
intptr_t i, j, oprsz = simd_oprsz(desc);
|
||||||
uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
|
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
|
||||||
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
|
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
|
||||||
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
|
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
|
||||||
float_status *status = &env->vfp.fp_status_a64;
|
float_status *status = &env->vfp.fp_status[FPST_A64];
|
||||||
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
|
bool fz16 = env->vfp.fpcr & FPCR_FZ16;
|
||||||
|
int negx = 0, negf = 0;
|
||||||
|
|
||||||
|
if (is_s) {
|
||||||
|
if (env->vfp.fpcr & FPCR_AH) {
|
||||||
|
negf = float_muladd_negate_product;
|
||||||
|
} else {
|
||||||
|
negx = 0x8000;
|
||||||
|
}
|
||||||
|
}
|
||||||
for (i = 0; i < oprsz; i += 16) {
|
for (i = 0; i < oprsz; i += 16) {
|
||||||
float16 mm_16 = *(float16 *)(vm + i + idx);
|
float16 mm_16 = *(float16 *)(vm + i + idx);
|
||||||
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
|
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
|
||||||
|
|
||||||
for (j = 0; j < 16; j += sizeof(float32)) {
|
for (j = 0; j < 16; j += sizeof(float32)) {
|
||||||
float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn;
|
float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx;
|
||||||
float32 nn = float16_to_float32_by_bits(nn_16, fz16);
|
float32 nn = float16_to_float32_by_bits(nn_16, fz16);
|
||||||
float32 aa = *(float32 *)(va + H1_4(i + j));
|
float32 aa = *(float32 *)(va + H1_4(i + j));
|
||||||
|
|
||||||
*(float32 *)(vd + H1_4(i + j)) =
|
*(float32 *)(vd + H1_4(i + j)) =
|
||||||
float32_muladd(nn, mm, aa, 0, status);
|
float32_muladd(nn, mm, aa, negf, status);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2436,6 +2573,16 @@ DO_3OP_PAIR(gvec_fminnump_h, float16_minnum, float16, H2)
|
|||||||
DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4)
|
DO_3OP_PAIR(gvec_fminnump_s, float32_minnum, float32, H4)
|
||||||
DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, )
|
DO_3OP_PAIR(gvec_fminnump_d, float64_minnum, float64, )
|
||||||
|
|
||||||
|
#ifdef TARGET_AARCH64
|
||||||
|
DO_3OP_PAIR(gvec_ah_fmaxp_h, helper_vfp_ah_maxh, float16, H2)
|
||||||
|
DO_3OP_PAIR(gvec_ah_fmaxp_s, helper_vfp_ah_maxs, float32, H4)
|
||||||
|
DO_3OP_PAIR(gvec_ah_fmaxp_d, helper_vfp_ah_maxd, float64, )
|
||||||
|
|
||||||
|
DO_3OP_PAIR(gvec_ah_fminp_h, helper_vfp_ah_minh, float16, H2)
|
||||||
|
DO_3OP_PAIR(gvec_ah_fminp_s, helper_vfp_ah_mins, float32, H4)
|
||||||
|
DO_3OP_PAIR(gvec_ah_fminp_d, helper_vfp_ah_mind, float64, )
|
||||||
|
#endif
|
||||||
|
|
||||||
#undef DO_3OP_PAIR
|
#undef DO_3OP_PAIR
|
||||||
|
|
||||||
#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \
|
#define DO_3OP_PAIR(NAME, FUNC, TYPE, H) \
|
||||||
@ -2808,7 +2955,7 @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp)
|
|||||||
*/
|
*/
|
||||||
bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF;
|
bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF;
|
||||||
|
|
||||||
*statusp = is_a64(env) ? env->vfp.fp_status_a64 : env->vfp.fp_status_a32;
|
*statusp = env->vfp.fp_status[is_a64(env) ? FPST_A64 : FPST_A32];
|
||||||
set_default_nan_mode(true, statusp);
|
set_default_nan_mode(true, statusp);
|
||||||
|
|
||||||
if (ebf) {
|
if (ebf) {
|
||||||
|
@ -20,6 +20,8 @@
|
|||||||
#ifndef TARGET_ARM_VEC_INTERNAL_H
|
#ifndef TARGET_ARM_VEC_INTERNAL_H
|
||||||
#define TARGET_ARM_VEC_INTERNAL_H
|
#define TARGET_ARM_VEC_INTERNAL_H
|
||||||
|
|
||||||
|
#include "fpu/softfloat.h"
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Note that vector data is stored in host-endian 64-bit chunks,
|
* Note that vector data is stored in host-endian 64-bit chunks,
|
||||||
* so addressing units smaller than that needs a host-endian fixup.
|
* so addressing units smaller than that needs a host-endian fixup.
|
||||||
@ -265,4 +267,37 @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
|
|||||||
*/
|
*/
|
||||||
bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
|
bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Negate as for FPCR.AH=1 -- do not negate NaNs.
|
||||||
|
*/
|
||||||
|
static inline float16 float16_ah_chs(float16 a)
|
||||||
|
{
|
||||||
|
return float16_is_any_nan(a) ? a : float16_chs(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float32 float32_ah_chs(float32 a)
|
||||||
|
{
|
||||||
|
return float32_is_any_nan(a) ? a : float32_chs(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float64 float64_ah_chs(float64 a)
|
||||||
|
{
|
||||||
|
return float64_is_any_nan(a) ? a : float64_chs(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah)
|
||||||
|
{
|
||||||
|
return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah)
|
||||||
|
{
|
||||||
|
return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah)
|
||||||
|
{
|
||||||
|
return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a);
|
||||||
|
}
|
||||||
|
|
||||||
#endif /* TARGET_ARM_VEC_INTERNAL_H */
|
#endif /* TARGET_ARM_VEC_INTERNAL_H */
|
||||||
|
@ -22,19 +22,63 @@
|
|||||||
#include "exec/helper-proto.h"
|
#include "exec/helper-proto.h"
|
||||||
#include "internals.h"
|
#include "internals.h"
|
||||||
#include "cpu-features.h"
|
#include "cpu-features.h"
|
||||||
|
#include "fpu/softfloat.h"
|
||||||
#ifdef CONFIG_TCG
|
#ifdef CONFIG_TCG
|
||||||
#include "qemu/log.h"
|
#include "qemu/log.h"
|
||||||
#include "fpu/softfloat.h"
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* VFP support. We follow the convention used for VFP instructions:
|
/* VFP support. We follow the convention used for VFP instructions:
|
||||||
Single precision routines have a "s" suffix, double precision a
|
Single precision routines have a "s" suffix, double precision a
|
||||||
"d" suffix. */
|
"d" suffix. */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set the float_status behaviour to match the Arm defaults:
|
||||||
|
* * tininess-before-rounding
|
||||||
|
* * 2-input NaN propagation prefers SNaN over QNaN, and then
|
||||||
|
* operand A over operand B (see FPProcessNaNs() pseudocode)
|
||||||
|
* * 3-input NaN propagation prefers SNaN over QNaN, and then
|
||||||
|
* operand C over A over B (see FPProcessNaNs3() pseudocode,
|
||||||
|
* but note that for QEMU muladd is a * b + c, whereas for
|
||||||
|
* the pseudocode function the arguments are in the order c, a, b.
|
||||||
|
* * 0 * Inf + NaN returns the default NaN if the input NaN is quiet,
|
||||||
|
* and the input NaN if it is signalling
|
||||||
|
* * Default NaN has sign bit clear, msb frac bit set
|
||||||
|
*/
|
||||||
|
void arm_set_default_fp_behaviours(float_status *s)
|
||||||
|
{
|
||||||
|
set_float_detect_tininess(float_tininess_before_rounding, s);
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding, s);
|
||||||
|
set_float_2nan_prop_rule(float_2nan_prop_s_ab, s);
|
||||||
|
set_float_3nan_prop_rule(float_3nan_prop_s_cab, s);
|
||||||
|
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, s);
|
||||||
|
set_float_default_nan_pattern(0b01000000, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set the float_status behaviour to match the FEAT_AFP
|
||||||
|
* FPCR.AH=1 requirements:
|
||||||
|
* * tininess-after-rounding
|
||||||
|
* * 2-input NaN propagation prefers the first NaN
|
||||||
|
* * 3-input NaN propagation prefers a over b over c
|
||||||
|
* * 0 * Inf + NaN always returns the input NaN and doesn't
|
||||||
|
* set Invalid for a QNaN
|
||||||
|
* * default NaN has sign bit set, msb frac bit set
|
||||||
|
*/
|
||||||
|
void arm_set_ah_fp_behaviours(float_status *s)
|
||||||
|
{
|
||||||
|
set_float_detect_tininess(float_tininess_after_rounding, s);
|
||||||
|
set_float_ftz_detection(float_ftz_after_rounding, s);
|
||||||
|
set_float_2nan_prop_rule(float_2nan_prop_ab, s);
|
||||||
|
set_float_3nan_prop_rule(float_3nan_prop_abc, s);
|
||||||
|
set_float_infzeronan_rule(float_infzeronan_dnan_never |
|
||||||
|
float_infzeronan_suppress_invalid, s);
|
||||||
|
set_float_default_nan_pattern(0b11000000, s);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_TCG
|
#ifdef CONFIG_TCG
|
||||||
|
|
||||||
/* Convert host exception flags to vfp form. */
|
/* Convert host exception flags to vfp form. */
|
||||||
static inline uint32_t vfp_exceptbits_from_host(int host_bits)
|
static inline uint32_t vfp_exceptbits_from_host(int host_bits, bool ah)
|
||||||
{
|
{
|
||||||
uint32_t target_bits = 0;
|
uint32_t target_bits = 0;
|
||||||
|
|
||||||
@ -56,24 +100,52 @@ static inline uint32_t vfp_exceptbits_from_host(int host_bits)
|
|||||||
if (host_bits & float_flag_input_denormal_flushed) {
|
if (host_bits & float_flag_input_denormal_flushed) {
|
||||||
target_bits |= FPSR_IDC;
|
target_bits |= FPSR_IDC;
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
* With FPCR.AH, IDC is set when an input denormal is used,
|
||||||
|
* and flushing an output denormal to zero sets both IXC and UFC.
|
||||||
|
*/
|
||||||
|
if (ah && (host_bits & float_flag_input_denormal_used)) {
|
||||||
|
target_bits |= FPSR_IDC;
|
||||||
|
}
|
||||||
|
if (ah && (host_bits & float_flag_output_denormal_flushed)) {
|
||||||
|
target_bits |= FPSR_IXC;
|
||||||
|
}
|
||||||
return target_bits;
|
return target_bits;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
|
static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
|
||||||
{
|
{
|
||||||
uint32_t i = 0;
|
uint32_t a32_flags = 0, a64_flags = 0;
|
||||||
|
|
||||||
i |= get_float_exception_flags(&env->vfp.fp_status_a32);
|
a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]);
|
||||||
i |= get_float_exception_flags(&env->vfp.fp_status_a64);
|
a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
|
||||||
i |= get_float_exception_flags(&env->vfp.standard_fp_status);
|
|
||||||
/* FZ16 does not generate an input denormal exception. */
|
/* FZ16 does not generate an input denormal exception. */
|
||||||
i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
|
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16])
|
||||||
& ~float_flag_input_denormal_flushed);
|
& ~float_flag_input_denormal_flushed);
|
||||||
i |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
|
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
|
||||||
& ~float_flag_input_denormal_flushed);
|
& ~float_flag_input_denormal_flushed);
|
||||||
i |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
|
|
||||||
& ~float_flag_input_denormal_flushed);
|
a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]);
|
||||||
return vfp_exceptbits_from_host(i);
|
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16])
|
||||||
|
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
|
||||||
|
/*
|
||||||
|
* We do not merge in flags from FPST_AH or FPST_AH_F16, because
|
||||||
|
* they are used for insns that must not set the cumulative exception bits.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Flushing an input denormal *only* because FPCR.FIZ == 1 does
|
||||||
|
* not set FPSR.IDC; if FPCR.FZ is also set then this takes
|
||||||
|
* precedence and IDC is set (see the FPUnpackBase pseudocode).
|
||||||
|
* So squash it unless (FPCR.AH == 0 && FPCR.FZ == 1).
|
||||||
|
* We only do this for the a64 flags because FIZ has no effect
|
||||||
|
* on AArch32 even if it is set.
|
||||||
|
*/
|
||||||
|
if ((env->vfp.fpcr & (FPCR_FZ | FPCR_AH)) != FPCR_FZ) {
|
||||||
|
a64_flags &= ~float_flag_input_denormal_flushed;
|
||||||
|
}
|
||||||
|
return vfp_exceptbits_from_host(a64_flags, env->vfp.fpcr & FPCR_AH) |
|
||||||
|
vfp_exceptbits_from_host(a32_flags, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vfp_clear_float_status_exc_flags(CPUARMState *env)
|
static void vfp_clear_float_status_exc_flags(CPUARMState *env)
|
||||||
@ -83,12 +155,25 @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
|
|||||||
* values. The caller should have arranged for env->vfp.fpsr to
|
* values. The caller should have arranged for env->vfp.fpsr to
|
||||||
* be the architecturally up-to-date exception flag information first.
|
* be the architecturally up-to-date exception flag information first.
|
||||||
*/
|
*/
|
||||||
set_float_exception_flags(0, &env->vfp.fp_status_a32);
|
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]);
|
||||||
set_float_exception_flags(0, &env->vfp.fp_status_a64);
|
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]);
|
||||||
set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
|
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
|
||||||
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
|
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
|
||||||
set_float_exception_flags(0, &env->vfp.standard_fp_status);
|
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
|
||||||
set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
|
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
|
||||||
|
set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]);
|
||||||
|
set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Synchronize any pending exception-flag information in the
|
||||||
|
* float_status values into env->vfp.fpsr, and then clear out
|
||||||
|
* the float_status data.
|
||||||
|
*/
|
||||||
|
env->vfp.fpsr |= vfp_get_fpsr_from_host(env);
|
||||||
|
vfp_clear_float_status_exc_flags(env);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
|
static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
|
||||||
@ -113,33 +198,66 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
|
|||||||
i = float_round_to_zero;
|
i = float_round_to_zero;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
|
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]);
|
||||||
set_float_rounding_mode(i, &env->vfp.fp_status_a64);
|
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]);
|
||||||
set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32);
|
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
|
||||||
set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64);
|
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
|
||||||
}
|
}
|
||||||
if (changed & FPCR_FZ16) {
|
if (changed & FPCR_FZ16) {
|
||||||
bool ftz_enabled = val & FPCR_FZ16;
|
bool ftz_enabled = val & FPCR_FZ16;
|
||||||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
|
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
|
||||||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
|
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
|
||||||
set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
|
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
|
||||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
|
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
|
||||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
|
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
|
||||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
|
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
|
||||||
|
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
|
||||||
|
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
|
||||||
}
|
}
|
||||||
if (changed & FPCR_FZ) {
|
if (changed & FPCR_FZ) {
|
||||||
bool ftz_enabled = val & FPCR_FZ;
|
bool ftz_enabled = val & FPCR_FZ;
|
||||||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
|
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
|
||||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
|
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]);
|
||||||
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
|
/* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
|
||||||
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
|
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
|
||||||
|
}
|
||||||
|
if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
|
||||||
|
/*
|
||||||
|
* A64: Flush denormalized inputs to zero if FPCR.FIZ = 1, or
|
||||||
|
* both FPCR.AH = 0 and FPCR.FZ = 1.
|
||||||
|
*/
|
||||||
|
bool fitz_enabled = (val & FPCR_FIZ) ||
|
||||||
|
(val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ;
|
||||||
|
set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]);
|
||||||
}
|
}
|
||||||
if (changed & FPCR_DN) {
|
if (changed & FPCR_DN) {
|
||||||
bool dnan_enabled = val & FPCR_DN;
|
bool dnan_enabled = val & FPCR_DN;
|
||||||
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
|
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]);
|
||||||
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
|
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]);
|
||||||
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
|
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
|
||||||
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
|
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
|
||||||
|
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
|
||||||
|
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
|
||||||
|
}
|
||||||
|
if (changed & FPCR_AH) {
|
||||||
|
bool ah_enabled = val & FPCR_AH;
|
||||||
|
|
||||||
|
if (ah_enabled) {
|
||||||
|
/* Change behaviours for A64 FP operations */
|
||||||
|
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
|
||||||
|
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
|
||||||
|
} else {
|
||||||
|
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
|
||||||
|
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* If any bits changed that we look at in vfp_get_fpsr_from_host(),
|
||||||
|
* we must sync the float_status flags into vfp.fpsr now (under the
|
||||||
|
* old regime) before we update vfp.fpcr.
|
||||||
|
*/
|
||||||
|
if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
|
||||||
|
vfp_sync_and_clear_float_status_exc_flags(env);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -242,6 +360,9 @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
|
|||||||
if (!cpu_isar_feature(any_fp16, cpu)) {
|
if (!cpu_isar_feature(any_fp16, cpu)) {
|
||||||
val &= ~FPCR_FZ16;
|
val &= ~FPCR_FZ16;
|
||||||
}
|
}
|
||||||
|
if (!cpu_isar_feature(aa64_afp, cpu)) {
|
||||||
|
val &= ~(FPCR_FIZ | FPCR_AH | FPCR_NEP);
|
||||||
|
}
|
||||||
|
|
||||||
if (!cpu_isar_feature(aa64_ebf16, cpu)) {
|
if (!cpu_isar_feature(aa64_ebf16, cpu)) {
|
||||||
val &= ~FPCR_EBF;
|
val &= ~FPCR_EBF;
|
||||||
@ -271,12 +392,14 @@ static void vfp_set_fpcr_masked(CPUARMState *env, uint32_t val, uint32_t mask)
|
|||||||
* We don't implement trapped exception handling, so the
|
* We don't implement trapped exception handling, so the
|
||||||
* trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
|
* trap enable bits, IDE|IXE|UFE|OFE|DZE|IOE are all RAZ/WI (not RES0!)
|
||||||
*
|
*
|
||||||
* The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF
|
* The FPCR bits we keep in vfp.fpcr are AHP, DN, FZ, RMode, EBF, FZ16,
|
||||||
* and FZ16. Len, Stride and LTPSIZE we just handled. Store those bits
|
* FIZ, AH, and NEP.
|
||||||
|
* Len, Stride and LTPSIZE we just handled. Store those bits
|
||||||
* there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
|
* there, and zero any of the other FPCR bits and the RES0 and RAZ/WI
|
||||||
* bits.
|
* bits.
|
||||||
*/
|
*/
|
||||||
val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 | FPCR_EBF;
|
val &= FPCR_AHP | FPCR_DN | FPCR_FZ | FPCR_RMODE_MASK | FPCR_FZ16 |
|
||||||
|
FPCR_EBF | FPCR_FIZ | FPCR_AH | FPCR_NEP;
|
||||||
env->vfp.fpcr &= ~mask;
|
env->vfp.fpcr &= ~mask;
|
||||||
env->vfp.fpcr |= val;
|
env->vfp.fpcr |= val;
|
||||||
}
|
}
|
||||||
@ -366,16 +489,16 @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
|
|||||||
void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
|
void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
|
||||||
{ \
|
{ \
|
||||||
softfloat_to_vfp_compare(env, \
|
softfloat_to_vfp_compare(env, \
|
||||||
FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \
|
FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \
|
||||||
} \
|
} \
|
||||||
void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
|
void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
|
||||||
{ \
|
{ \
|
||||||
softfloat_to_vfp_compare(env, \
|
softfloat_to_vfp_compare(env, \
|
||||||
FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
|
FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \
|
||||||
}
|
}
|
||||||
DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32)
|
DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16)
|
||||||
DO_VFP_cmp(s, float32, float32, fp_status_a32)
|
DO_VFP_cmp(s, float32, float32, FPST_A32)
|
||||||
DO_VFP_cmp(d, float64, float64, fp_status_a32)
|
DO_VFP_cmp(d, float64, float64, FPST_A32)
|
||||||
#undef DO_VFP_cmp
|
#undef DO_VFP_cmp
|
||||||
|
|
||||||
/* Integer to float and float to integer conversions */
|
/* Integer to float and float to integer conversions */
|
||||||
@ -610,6 +733,33 @@ static int recip_estimate(int input)
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Increased precision version:
|
||||||
|
* input is a 13 bit fixed point number
|
||||||
|
* input range 2048 .. 4095 for a number from 0.5 <= x < 1.0.
|
||||||
|
* result range 4096 .. 8191 for a number from 1.0 to 2.0
|
||||||
|
*/
|
||||||
|
static int recip_estimate_incprec(int input)
|
||||||
|
{
|
||||||
|
int a, b, r;
|
||||||
|
assert(2048 <= input && input < 4096);
|
||||||
|
a = (input * 2) + 1;
|
||||||
|
/*
|
||||||
|
* The pseudocode expresses this as an operation on infinite
|
||||||
|
* precision reals where it calculates 2^25 / a and then looks
|
||||||
|
* at the error between that and the rounded-down-to-integer
|
||||||
|
* value to see if it should instead round up. We instead
|
||||||
|
* follow the same approach as the pseudocode for the 8-bit
|
||||||
|
* precision version, and calculate (2 * (2^25 / a)) as an
|
||||||
|
* integer so we can do the "add one and halve" to round it.
|
||||||
|
* So the 1 << 26 here is correct.
|
||||||
|
*/
|
||||||
|
b = (1 << 26) / a;
|
||||||
|
r = (b + 1) >> 1;
|
||||||
|
assert(4096 <= r && r < 8192);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Common wrapper to call recip_estimate
|
* Common wrapper to call recip_estimate
|
||||||
*
|
*
|
||||||
@ -619,7 +769,8 @@ static int recip_estimate(int input)
|
|||||||
* callee.
|
* callee.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
|
static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac,
|
||||||
|
bool increasedprecision)
|
||||||
{
|
{
|
||||||
uint32_t scaled, estimate;
|
uint32_t scaled, estimate;
|
||||||
uint64_t result_frac;
|
uint64_t result_frac;
|
||||||
@ -635,12 +786,22 @@ static uint64_t call_recip_estimate(int *exp, int exp_off, uint64_t frac)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* scaled = UInt('1':fraction<51:44>) */
|
if (increasedprecision) {
|
||||||
scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
|
/* scaled = UInt('1':fraction<51:41>) */
|
||||||
estimate = recip_estimate(scaled);
|
scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
|
||||||
|
estimate = recip_estimate_incprec(scaled);
|
||||||
|
} else {
|
||||||
|
/* scaled = UInt('1':fraction<51:44>) */
|
||||||
|
scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
|
||||||
|
estimate = recip_estimate(scaled);
|
||||||
|
}
|
||||||
|
|
||||||
result_exp = exp_off - *exp;
|
result_exp = exp_off - *exp;
|
||||||
result_frac = deposit64(0, 44, 8, estimate);
|
if (increasedprecision) {
|
||||||
|
result_frac = deposit64(0, 40, 12, estimate);
|
||||||
|
} else {
|
||||||
|
result_frac = deposit64(0, 44, 8, estimate);
|
||||||
|
}
|
||||||
if (result_exp == 0) {
|
if (result_exp == 0) {
|
||||||
result_frac = deposit64(result_frac >> 1, 51, 1, 1);
|
result_frac = deposit64(result_frac >> 1, 51, 1, 1);
|
||||||
} else if (result_exp == -1) {
|
} else if (result_exp == -1) {
|
||||||
@ -709,7 +870,7 @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst)
|
|||||||
}
|
}
|
||||||
|
|
||||||
f64_frac = call_recip_estimate(&f16_exp, 29,
|
f64_frac = call_recip_estimate(&f16_exp, 29,
|
||||||
((uint64_t) f16_frac) << (52 - 10));
|
((uint64_t) f16_frac) << (52 - 10), false);
|
||||||
|
|
||||||
/* result = sign : result_exp<4:0> : fraction<51:42> */
|
/* result = sign : result_exp<4:0> : fraction<51:42> */
|
||||||
f16_val = deposit32(0, 15, 1, f16_sign);
|
f16_val = deposit32(0, 15, 1, f16_sign);
|
||||||
@ -718,7 +879,11 @@ uint32_t HELPER(recpe_f16)(uint32_t input, float_status *fpst)
|
|||||||
return make_float16(f16_val);
|
return make_float16(f16_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
|
/*
|
||||||
|
* FEAT_RPRES means the f32 FRECPE has an "increased precision" variant
|
||||||
|
* which is used when FPCR.AH == 1.
|
||||||
|
*/
|
||||||
|
static float32 do_recpe_f32(float32 input, float_status *fpst, bool rpres)
|
||||||
{
|
{
|
||||||
float32 f32 = float32_squash_input_denormal(input, fpst);
|
float32 f32 = float32_squash_input_denormal(input, fpst);
|
||||||
uint32_t f32_val = float32_val(f32);
|
uint32_t f32_val = float32_val(f32);
|
||||||
@ -758,7 +923,7 @@ float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
|
|||||||
}
|
}
|
||||||
|
|
||||||
f64_frac = call_recip_estimate(&f32_exp, 253,
|
f64_frac = call_recip_estimate(&f32_exp, 253,
|
||||||
((uint64_t) f32_frac) << (52 - 23));
|
((uint64_t) f32_frac) << (52 - 23), rpres);
|
||||||
|
|
||||||
/* result = sign : result_exp<7:0> : fraction<51:29> */
|
/* result = sign : result_exp<7:0> : fraction<51:29> */
|
||||||
f32_val = deposit32(0, 31, 1, f32_sign);
|
f32_val = deposit32(0, 31, 1, f32_sign);
|
||||||
@ -767,6 +932,16 @@ float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
|
|||||||
return make_float32(f32_val);
|
return make_float32(f32_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float32 HELPER(recpe_f32)(float32 input, float_status *fpst)
|
||||||
|
{
|
||||||
|
return do_recpe_f32(input, fpst, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32 HELPER(recpe_rpres_f32)(float32 input, float_status *fpst)
|
||||||
|
{
|
||||||
|
return do_recpe_f32(input, fpst, true);
|
||||||
|
}
|
||||||
|
|
||||||
float64 HELPER(recpe_f64)(float64 input, float_status *fpst)
|
float64 HELPER(recpe_f64)(float64 input, float_status *fpst)
|
||||||
{
|
{
|
||||||
float64 f64 = float64_squash_input_denormal(input, fpst);
|
float64 f64 = float64_squash_input_denormal(input, fpst);
|
||||||
@ -806,7 +981,7 @@ float64 HELPER(recpe_f64)(float64 input, float_status *fpst)
|
|||||||
return float64_set_sign(float64_zero, float64_is_neg(f64));
|
return float64_set_sign(float64_zero, float64_is_neg(f64));
|
||||||
}
|
}
|
||||||
|
|
||||||
f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac);
|
f64_frac = call_recip_estimate(&f64_exp, 2045, f64_frac, false);
|
||||||
|
|
||||||
/* result = sign : result_exp<10:0> : fraction<51:0>; */
|
/* result = sign : result_exp<10:0> : fraction<51:0>; */
|
||||||
f64_val = deposit64(0, 63, 1, f64_sign);
|
f64_val = deposit64(0, 63, 1, f64_sign);
|
||||||
@ -840,8 +1015,36 @@ static int do_recip_sqrt_estimate(int a)
|
|||||||
return estimate;
|
return estimate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int do_recip_sqrt_estimate_incprec(int a)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate
|
||||||
|
* in terms of an infinite-precision floating point calculation of a
|
||||||
|
* square root. We implement this using the same kind of pure integer
|
||||||
|
* algorithm as the 8-bit mantissa, to get the same bit-for-bit result.
|
||||||
|
*/
|
||||||
|
int64_t b, estimate;
|
||||||
|
|
||||||
static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
|
assert(1024 <= a && a < 4096);
|
||||||
|
if (a < 2048) {
|
||||||
|
a = a * 2 + 1;
|
||||||
|
} else {
|
||||||
|
a = (a >> 1) << 1;
|
||||||
|
a = (a + 1) * 2;
|
||||||
|
}
|
||||||
|
b = 8192;
|
||||||
|
while (a * (b + 1) * (b + 1) < (1ULL << 39)) {
|
||||||
|
b += 1;
|
||||||
|
}
|
||||||
|
estimate = (b + 1) / 2;
|
||||||
|
|
||||||
|
assert(4096 <= estimate && estimate < 8192);
|
||||||
|
|
||||||
|
return estimate;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac,
|
||||||
|
bool increasedprecision)
|
||||||
{
|
{
|
||||||
int estimate;
|
int estimate;
|
||||||
uint32_t scaled;
|
uint32_t scaled;
|
||||||
@ -854,17 +1057,32 @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
|
|||||||
frac = extract64(frac, 0, 51) << 1;
|
frac = extract64(frac, 0, 51) << 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*exp & 1) {
|
if (increasedprecision) {
|
||||||
/* scaled = UInt('01':fraction<51:45>) */
|
if (*exp & 1) {
|
||||||
scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
|
/* scaled = UInt('01':fraction<51:42>) */
|
||||||
|
scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10));
|
||||||
|
} else {
|
||||||
|
/* scaled = UInt('1':fraction<51:41>) */
|
||||||
|
scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
|
||||||
|
}
|
||||||
|
estimate = do_recip_sqrt_estimate_incprec(scaled);
|
||||||
} else {
|
} else {
|
||||||
/* scaled = UInt('1':fraction<51:44>) */
|
if (*exp & 1) {
|
||||||
scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
|
/* scaled = UInt('01':fraction<51:45>) */
|
||||||
|
scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
|
||||||
|
} else {
|
||||||
|
/* scaled = UInt('1':fraction<51:44>) */
|
||||||
|
scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
|
||||||
|
}
|
||||||
|
estimate = do_recip_sqrt_estimate(scaled);
|
||||||
}
|
}
|
||||||
estimate = do_recip_sqrt_estimate(scaled);
|
|
||||||
|
|
||||||
*exp = (exp_off - *exp) / 2;
|
*exp = (exp_off - *exp) / 2;
|
||||||
return extract64(estimate, 0, 8) << 44;
|
if (increasedprecision) {
|
||||||
|
return extract64(estimate, 0, 12) << 40;
|
||||||
|
} else {
|
||||||
|
return extract64(estimate, 0, 8) << 44;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
|
uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
|
||||||
@ -903,7 +1121,7 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
|
|||||||
|
|
||||||
f64_frac = ((uint64_t) f16_frac) << (52 - 10);
|
f64_frac = ((uint64_t) f16_frac) << (52 - 10);
|
||||||
|
|
||||||
f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
|
f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false);
|
||||||
|
|
||||||
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
|
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
|
||||||
val = deposit32(0, 15, 1, f16_sign);
|
val = deposit32(0, 15, 1, f16_sign);
|
||||||
@ -912,7 +1130,11 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
|
|||||||
return make_float16(val);
|
return make_float16(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
float32 HELPER(rsqrte_f32)(float32 input, float_status *s)
|
/*
|
||||||
|
* FEAT_RPRES means the f32 FRSQRTE has an "increased precision" variant
|
||||||
|
* which is used when FPCR.AH == 1.
|
||||||
|
*/
|
||||||
|
static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres)
|
||||||
{
|
{
|
||||||
float32 f32 = float32_squash_input_denormal(input, s);
|
float32 f32 = float32_squash_input_denormal(input, s);
|
||||||
uint32_t val = float32_val(f32);
|
uint32_t val = float32_val(f32);
|
||||||
@ -948,15 +1170,33 @@ float32 HELPER(rsqrte_f32)(float32 input, float_status *s)
|
|||||||
|
|
||||||
f64_frac = ((uint64_t) f32_frac) << 29;
|
f64_frac = ((uint64_t) f32_frac) << 29;
|
||||||
|
|
||||||
f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
|
f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres);
|
||||||
|
|
||||||
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
|
/*
|
||||||
|
* result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15)
|
||||||
|
* or for increased precision
|
||||||
|
* result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11)
|
||||||
|
*/
|
||||||
val = deposit32(0, 31, 1, f32_sign);
|
val = deposit32(0, 31, 1, f32_sign);
|
||||||
val = deposit32(val, 23, 8, f32_exp);
|
val = deposit32(val, 23, 8, f32_exp);
|
||||||
val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
|
if (rpres) {
|
||||||
|
val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12));
|
||||||
|
} else {
|
||||||
|
val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
|
||||||
|
}
|
||||||
return make_float32(val);
|
return make_float32(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float32 HELPER(rsqrte_f32)(float32 input, float_status *s)
|
||||||
|
{
|
||||||
|
return do_rsqrte_f32(input, s, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
float32 HELPER(rsqrte_rpres_f32)(float32 input, float_status *s)
|
||||||
|
{
|
||||||
|
return do_rsqrte_f32(input, s, true);
|
||||||
|
}
|
||||||
|
|
||||||
float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
|
float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
|
||||||
{
|
{
|
||||||
float64 f64 = float64_squash_input_denormal(input, s);
|
float64 f64 = float64_squash_input_denormal(input, s);
|
||||||
@ -987,7 +1227,7 @@ float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
|
|||||||
return float64_zero;
|
return float64_zero;
|
||||||
}
|
}
|
||||||
|
|
||||||
f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
|
f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false);
|
||||||
|
|
||||||
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
|
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
|
||||||
val = deposit64(0, 61, 1, f64_sign);
|
val = deposit64(0, 61, 1, f64_sign);
|
||||||
@ -1145,7 +1385,7 @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status)
|
|||||||
|
|
||||||
uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
|
uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
|
||||||
{
|
{
|
||||||
uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32);
|
uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]);
|
||||||
uint32_t result = pair;
|
uint32_t result = pair;
|
||||||
uint32_t z = (pair >> 32) == 0;
|
uint32_t z = (pair >> 32) == 0;
|
||||||
|
|
||||||
|
@ -67,6 +67,17 @@ void HELPER(loaded_fr0)(CPUHPPAState *env)
|
|||||||
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
|
set_float_infzeronan_rule(float_infzeronan_dnan_never, &env->fp_status);
|
||||||
/* Default NaN: sign bit clear, msb-1 frac bit set */
|
/* Default NaN: sign bit clear, msb-1 frac bit set */
|
||||||
set_float_default_nan_pattern(0b00100000, &env->fp_status);
|
set_float_default_nan_pattern(0b00100000, &env->fp_status);
|
||||||
|
/*
|
||||||
|
* "PA-RISC 2.0 Architecture" says it is IMPDEF whether the flushing
|
||||||
|
* enabled by FPSR.D happens before or after rounding. We pick "before"
|
||||||
|
* for consistency with tininess detection.
|
||||||
|
*/
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
|
||||||
|
/*
|
||||||
|
* TODO: "PA-RISC 2.0 Architecture" chapter 10 says that we should
|
||||||
|
* detect tininess before rounding, but we don't set that here so we
|
||||||
|
* get the default tininess after rounding.
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
void cpu_hppa_loaded_fr0(CPUHPPAState *env)
|
void cpu_hppa_loaded_fr0(CPUHPPAState *env)
|
||||||
|
@ -188,6 +188,14 @@ void cpu_init_fp_statuses(CPUX86State *env)
|
|||||||
set_float_default_nan_pattern(0b11000000, &env->fp_status);
|
set_float_default_nan_pattern(0b11000000, &env->fp_status);
|
||||||
set_float_default_nan_pattern(0b11000000, &env->mmx_status);
|
set_float_default_nan_pattern(0b11000000, &env->mmx_status);
|
||||||
set_float_default_nan_pattern(0b11000000, &env->sse_status);
|
set_float_default_nan_pattern(0b11000000, &env->sse_status);
|
||||||
|
/*
|
||||||
|
* TODO: x86 does flush-to-zero detection after rounding (the SDM
|
||||||
|
* section 10.2.3.3 on the FTZ bit of MXCSR says that we flush
|
||||||
|
* when we detect underflow, which x86 does after rounding).
|
||||||
|
*/
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding, &env->mmx_status);
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding, &env->sse_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uint8_t save_exception_flags(CPUX86State *env)
|
static inline uint8_t save_exception_flags(CPUX86State *env)
|
||||||
|
@ -84,6 +84,12 @@ static inline void fp_reset(CPUMIPSState *env)
|
|||||||
*/
|
*/
|
||||||
set_float_2nan_prop_rule(float_2nan_prop_s_ab,
|
set_float_2nan_prop_rule(float_2nan_prop_s_ab,
|
||||||
&env->active_fpu.fp_status);
|
&env->active_fpu.fp_status);
|
||||||
|
/*
|
||||||
|
* TODO: the spec does't say clearly whether FTZ happens before
|
||||||
|
* or after rounding for normal FPU operations.
|
||||||
|
*/
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding,
|
||||||
|
&env->active_fpu.fp_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* MSA */
|
/* MSA */
|
||||||
|
@ -48,6 +48,15 @@ void msa_reset(CPUMIPSState *env)
|
|||||||
/* tininess detected after rounding.*/
|
/* tininess detected after rounding.*/
|
||||||
set_float_detect_tininess(float_tininess_after_rounding,
|
set_float_detect_tininess(float_tininess_after_rounding,
|
||||||
&env->active_tc.msa_fp_status);
|
&env->active_tc.msa_fp_status);
|
||||||
|
/*
|
||||||
|
* MSACSR.FS detects tiny results to flush to zero before rounding
|
||||||
|
* (per "MIPS Architecture for Programmers Volume IV-j: The MIPS64 SIMD
|
||||||
|
* Architecture Module, Revision 1.1" section 3.5.4), even though it
|
||||||
|
* detects tininess after rounding for underflow purposes (section 3.4.2
|
||||||
|
* table 3.3).
|
||||||
|
*/
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding,
|
||||||
|
&env->active_tc.msa_fp_status);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* According to MIPS specifications, if one of the two operands is
|
* According to MIPS specifications, if one of the two operands is
|
||||||
|
@ -7262,6 +7262,9 @@ static void ppc_cpu_reset_hold(Object *obj, ResetType type)
|
|||||||
/* tininess for underflow is detected before rounding */
|
/* tininess for underflow is detected before rounding */
|
||||||
set_float_detect_tininess(float_tininess_before_rounding,
|
set_float_detect_tininess(float_tininess_before_rounding,
|
||||||
&env->fp_status);
|
&env->fp_status);
|
||||||
|
/* Similarly for flush-to-zero */
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* PowerPC propagation rules:
|
* PowerPC propagation rules:
|
||||||
* 1. A if it sNaN or qNaN
|
* 1. A if it sNaN or qNaN
|
||||||
|
@ -103,6 +103,14 @@ static void rx_cpu_reset_hold(Object *obj, ResetType type)
|
|||||||
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
|
set_float_2nan_prop_rule(float_2nan_prop_x87, &env->fp_status);
|
||||||
/* Default NaN value: sign bit clear, set frac msb */
|
/* Default NaN value: sign bit clear, set frac msb */
|
||||||
set_float_default_nan_pattern(0b01000000, &env->fp_status);
|
set_float_default_nan_pattern(0b01000000, &env->fp_status);
|
||||||
|
/*
|
||||||
|
* TODO: "RX Family RXv1 Instruction Set Architecture" is not 100% clear
|
||||||
|
* on whether flush-to-zero should happen before or after rounding, but
|
||||||
|
* section 1.3.2 says that it happens when underflow is detected, and
|
||||||
|
* implies that underflow is detected after rounding. So this may not
|
||||||
|
* be the correct setting.
|
||||||
|
*/
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
static ObjectClass *rx_cpu_class_by_name(const char *cpu_model)
|
static ObjectClass *rx_cpu_class_by_name(const char *cpu_model)
|
||||||
|
@ -130,6 +130,14 @@ static void superh_cpu_reset_hold(Object *obj, ResetType type)
|
|||||||
set_default_nan_mode(1, &env->fp_status);
|
set_default_nan_mode(1, &env->fp_status);
|
||||||
/* sign bit clear, set all frac bits other than msb */
|
/* sign bit clear, set all frac bits other than msb */
|
||||||
set_float_default_nan_pattern(0b00111111, &env->fp_status);
|
set_float_default_nan_pattern(0b00111111, &env->fp_status);
|
||||||
|
/*
|
||||||
|
* TODO: "SH-4 CPU Core Architecture ADCS 7182230F" doesn't say whether
|
||||||
|
* it detects tininess before or after rounding. Section 6.4 is clear
|
||||||
|
* that flush-to-zero happens when the result underflows, though, so
|
||||||
|
* either this should be "detect ftz after rounding" or else we should
|
||||||
|
* be setting "detect tininess before rounding".
|
||||||
|
*/
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
|
static void superh_cpu_disas_set_info(CPUState *cpu, disassemble_info *info)
|
||||||
|
@ -116,6 +116,7 @@ void fpu_set_state(CPUTriCoreState *env)
|
|||||||
set_flush_inputs_to_zero(1, &env->fp_status);
|
set_flush_inputs_to_zero(1, &env->fp_status);
|
||||||
set_flush_to_zero(1, &env->fp_status);
|
set_flush_to_zero(1, &env->fp_status);
|
||||||
set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status);
|
set_float_detect_tininess(float_tininess_before_rounding, &env->fp_status);
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding, &env->fp_status);
|
||||||
set_default_nan_mode(1, &env->fp_status);
|
set_default_nan_mode(1, &env->fp_status);
|
||||||
/* Default NaN pattern: sign bit clear, frac msb set */
|
/* Default NaN pattern: sign bit clear, frac msb set */
|
||||||
set_float_default_nan_pattern(0b01000000, &env->fp_status);
|
set_float_default_nan_pattern(0b01000000, &env->fp_status);
|
||||||
|
@ -496,6 +496,7 @@ static void run_bench(void)
|
|||||||
set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status);
|
set_float_3nan_prop_rule(float_3nan_prop_s_cab, &soft_status);
|
||||||
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status);
|
set_float_infzeronan_rule(float_infzeronan_dnan_if_qnan, &soft_status);
|
||||||
set_float_default_nan_pattern(0b01000000, &soft_status);
|
set_float_default_nan_pattern(0b01000000, &soft_status);
|
||||||
|
set_float_ftz_detection(float_ftz_before_rounding, &soft_status);
|
||||||
|
|
||||||
f = bench_funcs[operation][precision];
|
f = bench_funcs[operation][precision];
|
||||||
g_assert(f);
|
g_assert(f);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user