target/arm: Implement increased precision FRSQRTE
Implement the increased precision variation of FRSQRTE. In the pseudocode this corresponds to the handling of the "increasedprecision" boolean in the FPRSqrtEstimate() and RecipSqrtEstimate() functions. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
c1567205e0
commit
22330d2b0f
@ -1015,8 +1015,36 @@ static int do_recip_sqrt_estimate(int a)
|
||||
return estimate;
|
||||
}
|
||||
|
||||
static int do_recip_sqrt_estimate_incprec(int a)
|
||||
{
|
||||
/*
|
||||
* The Arm ARM describes the 12-bit precision version of RecipSqrtEstimate
|
||||
* in terms of an infinite-precision floating point calculation of a
|
||||
* square root. We implement this using the same kind of pure integer
|
||||
* algorithm as the 8-bit mantissa, to get the same bit-for-bit result.
|
||||
*/
|
||||
int64_t b, estimate;
|
||||
|
||||
static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
|
||||
assert(1024 <= a && a < 4096);
|
||||
if (a < 2048) {
|
||||
a = a * 2 + 1;
|
||||
} else {
|
||||
a = (a >> 1) << 1;
|
||||
a = (a + 1) * 2;
|
||||
}
|
||||
b = 8192;
|
||||
while (a * (b + 1) * (b + 1) < (1ULL << 39)) {
|
||||
b += 1;
|
||||
}
|
||||
estimate = (b + 1) / 2;
|
||||
|
||||
assert(4096 <= estimate && estimate < 8192);
|
||||
|
||||
return estimate;
|
||||
}
|
||||
|
||||
static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac,
|
||||
bool increasedprecision)
|
||||
{
|
||||
int estimate;
|
||||
uint32_t scaled;
|
||||
@ -1029,17 +1057,32 @@ static uint64_t recip_sqrt_estimate(int *exp , int exp_off, uint64_t frac)
|
||||
frac = extract64(frac, 0, 51) << 1;
|
||||
}
|
||||
|
||||
if (*exp & 1) {
|
||||
/* scaled = UInt('01':fraction<51:45>) */
|
||||
scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
|
||||
if (increasedprecision) {
|
||||
if (*exp & 1) {
|
||||
/* scaled = UInt('01':fraction<51:42>) */
|
||||
scaled = deposit32(1 << 10, 0, 10, extract64(frac, 42, 10));
|
||||
} else {
|
||||
/* scaled = UInt('1':fraction<51:41>) */
|
||||
scaled = deposit32(1 << 11, 0, 11, extract64(frac, 41, 11));
|
||||
}
|
||||
estimate = do_recip_sqrt_estimate_incprec(scaled);
|
||||
} else {
|
||||
/* scaled = UInt('1':fraction<51:44>) */
|
||||
scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
|
||||
if (*exp & 1) {
|
||||
/* scaled = UInt('01':fraction<51:45>) */
|
||||
scaled = deposit32(1 << 7, 0, 7, extract64(frac, 45, 7));
|
||||
} else {
|
||||
/* scaled = UInt('1':fraction<51:44>) */
|
||||
scaled = deposit32(1 << 8, 0, 8, extract64(frac, 44, 8));
|
||||
}
|
||||
estimate = do_recip_sqrt_estimate(scaled);
|
||||
}
|
||||
estimate = do_recip_sqrt_estimate(scaled);
|
||||
|
||||
*exp = (exp_off - *exp) / 2;
|
||||
return extract64(estimate, 0, 8) << 44;
|
||||
if (increasedprecision) {
|
||||
return extract64(estimate, 0, 12) << 40;
|
||||
} else {
|
||||
return extract64(estimate, 0, 8) << 44;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
|
||||
@ -1078,7 +1121,7 @@ uint32_t HELPER(rsqrte_f16)(uint32_t input, float_status *s)
|
||||
|
||||
f64_frac = ((uint64_t) f16_frac) << (52 - 10);
|
||||
|
||||
f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac);
|
||||
f64_frac = recip_sqrt_estimate(&f16_exp, 44, f64_frac, false);
|
||||
|
||||
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(2) */
|
||||
val = deposit32(0, 15, 1, f16_sign);
|
||||
@ -1127,12 +1170,20 @@ static float32 do_rsqrte_f32(float32 input, float_status *s, bool rpres)
|
||||
|
||||
f64_frac = ((uint64_t) f32_frac) << 29;
|
||||
|
||||
f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac);
|
||||
f64_frac = recip_sqrt_estimate(&f32_exp, 380, f64_frac, rpres);
|
||||
|
||||
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(15) */
|
||||
/*
|
||||
* result = sign : result_exp<7:0> : estimate<7:0> : Zeros(15)
|
||||
* or for increased precision
|
||||
* result = sign : result_exp<7:0> : estimate<11:0> : Zeros(11)
|
||||
*/
|
||||
val = deposit32(0, 31, 1, f32_sign);
|
||||
val = deposit32(val, 23, 8, f32_exp);
|
||||
val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
|
||||
if (rpres) {
|
||||
val = deposit32(val, 11, 12, extract64(f64_frac, 52 - 12, 12));
|
||||
} else {
|
||||
val = deposit32(val, 15, 8, extract64(f64_frac, 52 - 8, 8));
|
||||
}
|
||||
return make_float32(val);
|
||||
}
|
||||
|
||||
@ -1176,7 +1227,7 @@ float64 HELPER(rsqrte_f64)(float64 input, float_status *s)
|
||||
return float64_zero;
|
||||
}
|
||||
|
||||
f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac);
|
||||
f64_frac = recip_sqrt_estimate(&f64_exp, 3068, f64_frac, false);
|
||||
|
||||
/* result = sign : result_exp<4:0> : estimate<7:0> : Zeros(44) */
|
||||
val = deposit64(0, 61, 1, f64_sign);
|
||||
|
Loading…
x
Reference in New Issue
Block a user