tcg/optimize: Compute sign mask in fold_deposit

The input which overlaps the sign bit of the output can
have its input s_mask propagated to the output s_mask.

Reviewed-by: Pierrick Bouvier <pierrick.bouvier@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2024-12-19 17:56:05 -08:00
parent c7739ab83e
commit edb832cb51

View File

@ -1629,8 +1629,9 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
TempOptInfo *t2 = arg_info(op->args[2]); TempOptInfo *t2 = arg_info(op->args[2]);
int ofs = op->args[3]; int ofs = op->args[3];
int len = op->args[4]; int len = op->args[4];
int width;
TCGOpcode and_opc; TCGOpcode and_opc;
uint64_t z_mask; uint64_t z_mask, s_mask;
if (ti_is_const(t1) && ti_is_const(t2)) { if (ti_is_const(t1) && ti_is_const(t2)) {
return tcg_opt_gen_movi(ctx, op, op->args[0], return tcg_opt_gen_movi(ctx, op, op->args[0],
@ -1641,9 +1642,11 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
switch (ctx->type) { switch (ctx->type) {
case TCG_TYPE_I32: case TCG_TYPE_I32:
and_opc = INDEX_op_and_i32; and_opc = INDEX_op_and_i32;
width = 32;
break; break;
case TCG_TYPE_I64: case TCG_TYPE_I64:
and_opc = INDEX_op_and_i64; and_opc = INDEX_op_and_i64;
width = 64;
break; break;
default: default:
g_assert_not_reached(); g_assert_not_reached();
@ -1668,8 +1671,15 @@ static bool fold_deposit(OptContext *ctx, TCGOp *op)
return fold_and(ctx, op); return fold_and(ctx, op);
} }
/* The s_mask from the top portion of the deposit is still valid. */
if (ofs + len == width) {
s_mask = t2->s_mask << ofs;
} else {
s_mask = t1->s_mask & ~MAKE_64BIT_MASK(0, ofs + len);
}
z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask); z_mask = deposit64(t1->z_mask, ofs, len, t2->z_mask);
return fold_masks_z(ctx, op, z_mask); return fold_masks_zs(ctx, op, z_mask, s_mask);
} }
static bool fold_divide(OptContext *ctx, TCGOp *op) static bool fold_divide(OptContext *ctx, TCGOp *op)