target/arm: Move helper_neon_addlp_{s8, s16} to neon_helper.c
Move from helper-a64.c to neon_helper.c so that these functions are available for arm32 code as well. Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org> Message-id: 20241211163036.2297116-45-richard.henderson@linaro.org Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
parent
7c6bdcdaed
commit
e90cf92209
@ -399,6 +399,8 @@ DEF_HELPER_2(neon_addl_u16, i64, i64, i64)
|
|||||||
DEF_HELPER_2(neon_addl_u32, i64, i64, i64)
|
DEF_HELPER_2(neon_addl_u32, i64, i64, i64)
|
||||||
DEF_HELPER_2(neon_paddl_u16, i64, i64, i64)
|
DEF_HELPER_2(neon_paddl_u16, i64, i64, i64)
|
||||||
DEF_HELPER_2(neon_paddl_u32, i64, i64, i64)
|
DEF_HELPER_2(neon_paddl_u32, i64, i64, i64)
|
||||||
|
DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||||
|
DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||||
DEF_HELPER_2(neon_subl_u16, i64, i64, i64)
|
DEF_HELPER_2(neon_subl_u16, i64, i64, i64)
|
||||||
DEF_HELPER_2(neon_subl_u32, i64, i64, i64)
|
DEF_HELPER_2(neon_subl_u32, i64, i64, i64)
|
||||||
DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64)
|
DEF_HELPER_3(neon_addl_saturate_s32, i64, env, i64, i64)
|
||||||
|
@ -306,39 +306,6 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
|
|||||||
return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
|
return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Pairwise long add: add pairs of adjacent elements into
|
|
||||||
* double-width elements in the result (eg _s8 is an 8x8->16 op)
|
|
||||||
*/
|
|
||||||
uint64_t HELPER(neon_addlp_s8)(uint64_t a)
|
|
||||||
{
|
|
||||||
uint64_t nsignmask = 0x0080008000800080ULL;
|
|
||||||
uint64_t wsignmask = 0x8000800080008000ULL;
|
|
||||||
uint64_t elementmask = 0x00ff00ff00ff00ffULL;
|
|
||||||
uint64_t tmp1, tmp2;
|
|
||||||
uint64_t res, signres;
|
|
||||||
|
|
||||||
/* Extract odd elements, sign extend each to a 16 bit field */
|
|
||||||
tmp1 = a & elementmask;
|
|
||||||
tmp1 ^= nsignmask;
|
|
||||||
tmp1 |= wsignmask;
|
|
||||||
tmp1 = (tmp1 - nsignmask) ^ wsignmask;
|
|
||||||
/* Ditto for the even elements */
|
|
||||||
tmp2 = (a >> 8) & elementmask;
|
|
||||||
tmp2 ^= nsignmask;
|
|
||||||
tmp2 |= wsignmask;
|
|
||||||
tmp2 = (tmp2 - nsignmask) ^ wsignmask;
|
|
||||||
|
|
||||||
/* calculate the result by summing bits 0..14, 16..22, etc,
|
|
||||||
* and then adjusting the sign bits 15, 23, etc manually.
|
|
||||||
* This ensures the addition can't overflow the 16 bit field.
|
|
||||||
*/
|
|
||||||
signres = (tmp1 ^ tmp2) & wsignmask;
|
|
||||||
res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
|
|
||||||
res ^= signres;
|
|
||||||
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t HELPER(neon_addlp_u8)(uint64_t a)
|
uint64_t HELPER(neon_addlp_u8)(uint64_t a)
|
||||||
{
|
{
|
||||||
uint64_t tmp;
|
uint64_t tmp;
|
||||||
@ -348,16 +315,6 @@ uint64_t HELPER(neon_addlp_u8)(uint64_t a)
|
|||||||
return tmp;
|
return tmp;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t HELPER(neon_addlp_s16)(uint64_t a)
|
|
||||||
{
|
|
||||||
int32_t reslo, reshi;
|
|
||||||
|
|
||||||
reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
|
|
||||||
reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
|
|
||||||
|
|
||||||
return (uint32_t)reslo | (((uint64_t)reshi) << 32);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t HELPER(neon_addlp_u16)(uint64_t a)
|
uint64_t HELPER(neon_addlp_u16)(uint64_t a)
|
||||||
{
|
{
|
||||||
uint64_t tmp;
|
uint64_t tmp;
|
||||||
|
@ -41,9 +41,7 @@ DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
|
|||||||
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
|
DEF_HELPER_FLAGS_3(rsqrtsf_f16, TCG_CALL_NO_RWG, f16, f16, f16, ptr)
|
||||||
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
|
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
|
||||||
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
|
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
|
||||||
DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
|
|
||||||
DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
|
DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||||
DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
|
|
||||||
DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
|
DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
|
||||||
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
|
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
|
||||||
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
|
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
|
||||||
|
@ -866,6 +866,49 @@ uint64_t HELPER(neon_paddl_u32)(uint64_t a, uint64_t b)
|
|||||||
return low + ((uint64_t)high << 32);
|
return low + ((uint64_t)high << 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Pairwise long add: add pairs of adjacent elements into
|
||||||
|
* double-width elements in the result (eg _s8 is an 8x8->16 op)
|
||||||
|
*/
|
||||||
|
uint64_t HELPER(neon_addlp_s8)(uint64_t a)
|
||||||
|
{
|
||||||
|
uint64_t nsignmask = 0x0080008000800080ULL;
|
||||||
|
uint64_t wsignmask = 0x8000800080008000ULL;
|
||||||
|
uint64_t elementmask = 0x00ff00ff00ff00ffULL;
|
||||||
|
uint64_t tmp1, tmp2;
|
||||||
|
uint64_t res, signres;
|
||||||
|
|
||||||
|
/* Extract odd elements, sign extend each to a 16 bit field */
|
||||||
|
tmp1 = a & elementmask;
|
||||||
|
tmp1 ^= nsignmask;
|
||||||
|
tmp1 |= wsignmask;
|
||||||
|
tmp1 = (tmp1 - nsignmask) ^ wsignmask;
|
||||||
|
/* Ditto for the even elements */
|
||||||
|
tmp2 = (a >> 8) & elementmask;
|
||||||
|
tmp2 ^= nsignmask;
|
||||||
|
tmp2 |= wsignmask;
|
||||||
|
tmp2 = (tmp2 - nsignmask) ^ wsignmask;
|
||||||
|
|
||||||
|
/* calculate the result by summing bits 0..14, 16..22, etc,
|
||||||
|
* and then adjusting the sign bits 15, 23, etc manually.
|
||||||
|
* This ensures the addition can't overflow the 16 bit field.
|
||||||
|
*/
|
||||||
|
signres = (tmp1 ^ tmp2) & wsignmask;
|
||||||
|
res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
|
||||||
|
res ^= signres;
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint64_t HELPER(neon_addlp_s16)(uint64_t a)
|
||||||
|
{
|
||||||
|
int32_t reslo, reshi;
|
||||||
|
|
||||||
|
reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
|
||||||
|
reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
|
||||||
|
|
||||||
|
return (uint32_t)reslo | (((uint64_t)reshi) << 32);
|
||||||
|
}
|
||||||
|
|
||||||
uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b)
|
uint64_t HELPER(neon_subl_u16)(uint64_t a, uint64_t b)
|
||||||
{
|
{
|
||||||
uint64_t mask;
|
uint64_t mask;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user