target/arm: Implement increased precision FRSQRTE

Implement the increased precision variation of FRSQRTE.  In the
pseudocode this corresponds to the handling of the
"increasedprecision" boolean in the FPRSqrtEstimate() and
RecipSqrtEstimate() functions.

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Peter Maydell 2025-02-01 16:39:58 +00:00
parent c1567205e0
commit 22330d2b0f

View File

@ -1015,8 +1015,36 @@ static int do_recip_sqrt_estimate(int a)
return estimate;
}
static int do_recip_sqrt_estimate_incprec(int a)
{
/*
* The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate
* in terms of an infinite-precision floating point calculation of a
* square root. We implement this using the same kind of pure integer
* algorithm as the 8-bit mantissa, to get the same bit-for-bit result.
*/
int64_t b, estimate;
static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
assert(1024 <= a && a < 4096);
if (a < 2048) {
a = a * 2 + 1;
} else {
a = (a >> 1) << 1;
a = (a + 1) * 2;
}
b = 8192;
while (a * (b + 1) * (b + 1) < (1ULL << 39)) {
b += 1;
}
estimate = (b + 1) / 2;
assert(4096 <= estimate && estimate < 8192);
return estimate;
}
static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac,
bool increasedprecision)
{
int estimate;
uint32_t scaled;
@ -1029,17 +1057,32 @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
frac = extract64(frac, 0, 51) << 1;
}
if (*exp & 1) {
/* scaled = UInt('01':fraction<51:45>) */
scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
if (increasedprecision) {
if (*exp & 1) {
/* scaled = UInt('01':fraction<51:42>) */
scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10));
} else {
/* scaled = UInt('1':fraction<51:41>) */
scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
}
estimate = do_recip_sqrt_estimate_incprec(scaled);
} else {
/* scaled = UInt('1':fraction<51:44>) */
scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
if (*exp & 1) {
/* scaled = UInt('01':fraction<51:45>) */
scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
} else {
/* scaled = UInt('1':fraction<51:44>) */
scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
}
estimate = do_recip_sqrt_estimate(scaled);
}
estimate = do_recip_sqrt_estimate(scaled);
*exp = (exp_off - *exp) / 2;
return extract64(estimate, 0, 8) << 44;
if (increasedprecision) {
return extract64(estimate, 0, 12) << 40;
} else {
return extract64(estimate, 0, 8) << 44;
}
}
uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
@ -1078,7 +1121,7 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
f64_frac = ((uint64_t) f16_frac) << (52 - 10);
f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false);
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
val = deposit32(0, 15, 1, f16_sign);
@ -1127,12 +1170,20 @@ static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres)
f64_frac = ((uint64_t) f32_frac) << 29;
f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres);
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
/*
* result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15)
* or for increased precision
* result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11)
*/
val = deposit32(0, 31, 1, f32_sign);
val = deposit32(val, 23, 8, f32_exp);
val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
if (rpres) {
val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12));
} else {
val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
}
return make_float32(val);
}
@ -1176,7 +1227,7 @@ float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
return float64_zero;
}
f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false);
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
val = deposit64(0, 61, 1, f64_sign);