tcg/optimize: Compute sign mask in fold_deposit

The input which overlaps the sign bit of the output can
have its input s_mask propagated to the output s_mask.

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2024-12-19 17:56:05 -08:00
parent c7739ab83e
commit edb832cb51

View File

@ -1629,8 +1629,9 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
TempOptInfo *t2 = arg_info(op->args[2]);
int ofs = op->args[3];
int len = op->args[4];
int width;
TCGOpcode and_opc;
uint64_t z_mask;
uint64_t z_mask, s_mask;
if (ti_is_const(t1) && ti_is_const(t2)) {
return tcg_opt_gen_movi(ctx, op, op->args[0],
@ -1641,9 +1642,11 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
switch (ctx->type) {
case TCG_TYPE_I32:
and_opc = INDEX_op_and_i32;
width = 32;
break;
case TCG_TYPE_I64:
and_opc = INDEX_op_and_i64;
width = 64;
break;
default:
g_assert_not_reached();
@ -1668,8 +1671,15 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
return fold_and(ctx, op);
}
/* The s_mask from the top portion of the deposit is still valid. */
if (ofs + len == width) {
s_mask = t2->s_mask << ofs;
} else {
s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
}
z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
return fold_masks_z(ctx, op, z_mask);
return fold_masks_zs(ctx, op, z_mask, s_mask);
}
static bool fold_divide(OptContext *ctx, TCGOp *op)