; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s ; Try to eliminate binops and shuffles when the shuffle is a select in disguise: ; PR37806 - https://bugs.llvm.org/show_bug.cgi?id=37806 define <4 x i32> @add(<4 x i32> %v) { ; CHECK-LABEL: @add( ; CHECK-NEXT: [[S:%.*]] = add <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = add <4 x i32> %v, %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } ; Propagate flags when possible. define <4 x i32> @add_nuw_nsw(<4 x i32> %v) { ; CHECK-LABEL: @add_nuw_nsw( ; CHECK-NEXT: [[S:%.*]] = add nuw nsw <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = add nuw nsw <4 x i32> %v, %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @add_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @add_undef_mask_elt( ; CHECK-NEXT: [[S:%.*]] = add <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = add <4 x i32> %v, %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } ; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @add_nuw_nsw_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @add_nuw_nsw_undef_mask_elt( ; CHECK-NEXT: [[S:%.*]] = add <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = add nuw nsw <4 x i32> %v, %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } ; Constant operand 0 (LHS) could work for some non-commutative binops? define <4 x i32> @sub(<4 x i32> %v) { ; CHECK-LABEL: @sub( ; CHECK-NEXT: [[B:%.*]] = sub <4 x i32> , [[V:%.*]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sub <4 x i32> , %v %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> ret <4 x i32> %s } ; If any element of the shuffle mask operand is undef, that element of the result is undef. ; The shuffle is eliminated in this transform, but we can replace a constant element with undef. ; Preserve flags when possible. It's not safe to propagate poison-generating flags with undef constants. define <4 x i32> @mul(<4 x i32> %v) { ; CHECK-LABEL: @mul( ; CHECK-NEXT: [[S:%.*]] = mul <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = mul nsw nuw <4 x i32> %v, %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> ret <4 x i32> %s } define <4 x i32> @shl(<4 x i32> %v) { ; CHECK-LABEL: @shl( ; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl <4 x i32> %v, %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @shl_nsw(<4 x i32> %v) { ; CHECK-LABEL: @shl_nsw( ; CHECK-NEXT: [[S:%.*]] = shl nsw <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl nsw <4 x i32> %v, %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @shl_undef_mask_elt( ; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl <4 x i32> %v, %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @shl_nuw_undef_mask_elt( ; CHECK-NEXT: [[S:%.*]] = shl nuw <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl nuw <4 x i32> %v, %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @lshr_constant_op0(<4 x i32> %v) { ; CHECK-LABEL: @lshr_constant_op0( ; CHECK-NEXT: [[S:%.*]] = lshr <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = lshr <4 x i32> %v, %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> ret <4 x i32> %s } define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) { ; CHECK-LABEL: @lshr_exact_constant_op0( ; CHECK-NEXT: [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = lshr exact <4 x i32> %v, %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> ret <4 x i32> %s } define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @lshr_undef_mask_elt( ; CHECK-NEXT: [[S:%.*]] = shl <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = shl <4 x i32> %v, %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @lshr_exact_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @lshr_exact_undef_mask_elt( ; CHECK-NEXT: [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = lshr exact <4 x i32> %v, %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @lshr_constant_op1(<4 x i32> %v) { ; CHECK-LABEL: @lshr_constant_op1( ; CHECK-NEXT: [[B:%.*]] = lshr exact <4 x i32> , [[V:%.*]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = lshr exact <4 x i32> , %v %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> ret <4 x i32> %s } ; Try weird types. define <3 x i32> @ashr(<3 x i32> %v) { ; CHECK-LABEL: @ashr( ; CHECK-NEXT: [[S:%.*]] = ashr <3 x i32> [[V:%.*]], ; CHECK-NEXT: ret <3 x i32> [[S]] ; %b = ashr <3 x i32> %v, %s = shufflevector <3 x i32> %b, <3 x i32> %v, <3 x i32> ret <3 x i32> %s } define <3 x i42> @and(<3 x i42> %v) { ; CHECK-LABEL: @and( ; CHECK-NEXT: [[S:%.*]] = and <3 x i42> [[V:%.*]], ; CHECK-NEXT: ret <3 x i42> [[S]] ; %b = and <3 x i42> %v, %s = shufflevector <3 x i42> %v, <3 x i42> %b, <3 x i32> ret <3 x i42> %s } ; It doesn't matter if the intermediate op has extra uses. declare void @use_v4i32(<4 x i32>) define <4 x i32> @or(<4 x i32> %v) { ; CHECK-LABEL: @or( ; CHECK-NEXT: [[B:%.*]] = or <4 x i32> [[V:%.*]], ; CHECK-NEXT: [[S:%.*]] = or <4 x i32> [[V]], ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[B]]) ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = or <4 x i32> %v, %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> call void @use_v4i32(<4 x i32> %b) ret <4 x i32> %s } define <4 x i32> @xor(<4 x i32> %v) { ; CHECK-LABEL: @xor( ; CHECK-NEXT: [[S:%.*]] = xor <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = xor <4 x i32> %v, %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> ret <4 x i32> %s } define <4 x i32> @udiv(<4 x i32> %v) { ; CHECK-LABEL: @udiv( ; CHECK-NEXT: [[B:%.*]] = udiv <4 x i32> , [[V:%.*]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = udiv <4 x i32> , %v %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @udiv_exact(<4 x i32> %v) { ; CHECK-LABEL: @udiv_exact( ; CHECK-NEXT: [[B:%.*]] = udiv exact <4 x i32> , [[V:%.*]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = udiv exact <4 x i32> , %v %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @udiv_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @udiv_undef_mask_elt( ; CHECK-NEXT: [[B:%.*]] = udiv <4 x i32> , [[V:%.*]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = udiv <4 x i32> , %v %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @udiv_exact_undef_mask_elt( ; CHECK-NEXT: [[B:%.*]] = udiv exact <4 x i32> , [[V:%.*]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = udiv exact <4 x i32> , %v %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @sdiv(<4 x i32> %v) { ; CHECK-LABEL: @sdiv( ; CHECK-NEXT: [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sdiv <4 x i32> %v, %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> ret <4 x i32> %s } define <4 x i32> @sdiv_exact(<4 x i32> %v) { ; CHECK-LABEL: @sdiv_exact( ; CHECK-NEXT: [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sdiv exact <4 x i32> %v, %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> ret <4 x i32> %s } ; Div/rem need special handling if the shuffle has undef elements. define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @sdiv_undef_mask_elt( ; CHECK-NEXT: [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sdiv <4 x i32> %v, %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> ret <4 x i32> %s } define <4 x i32> @sdiv_exact_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @sdiv_exact_undef_mask_elt( ; CHECK-NEXT: [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = sdiv exact <4 x i32> %v, %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> ret <4 x i32> %s } define <4 x i32> @urem(<4 x i32> %v) { ; CHECK-LABEL: @urem( ; CHECK-NEXT: [[B:%.*]] = urem <4 x i32> , [[V:%.*]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = urem <4 x i32> , %v %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @urem_undef_mask_elt(<4 x i32> %v) { ; CHECK-LABEL: @urem_undef_mask_elt( ; CHECK-NEXT: [[B:%.*]] = urem <4 x i32> , [[V:%.*]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = urem <4 x i32> , %v %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> ret <4 x i32> %s } define <4 x i32> @srem(<4 x i32> %v) { ; CHECK-LABEL: @srem( ; CHECK-NEXT: [[B:%.*]] = srem <4 x i32> , [[V:%.*]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = srem <4 x i32> , %v %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> ret <4 x i32> %s } ; Try FP ops/types. define <4 x float> @fadd(<4 x float> %v) { ; CHECK-LABEL: @fadd( ; CHECK-NEXT: [[S:%.*]] = fadd <4 x float> [[V:%.*]], ; CHECK-NEXT: ret <4 x float> [[S]] ; %b = fadd <4 x float> %v, %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> ret <4 x float> %s } define <4 x double> @fsub(<4 x double> %v) { ; CHECK-LABEL: @fsub( ; CHECK-NEXT: [[B:%.*]] = fsub <4 x double> , [[V:%.*]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S]] ; %b = fsub <4 x double> , %v %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> ret <4 x double> %s } ; Propagate any FMF. define <4 x float> @fmul(<4 x float> %v) { ; CHECK-LABEL: @fmul( ; CHECK-NEXT: [[S:%.*]] = fmul nnan ninf <4 x float> [[V:%.*]], ; CHECK-NEXT: ret <4 x float> [[S]] ; %b = fmul nnan ninf <4 x float> %v, %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> ret <4 x float> %s } define <4 x double> @fdiv_constant_op0(<4 x double> %v) { ; CHECK-LABEL: @fdiv_constant_op0( ; CHECK-NEXT: [[B:%.*]] = fdiv fast <4 x double> , [[V:%.*]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S]] ; %b = fdiv fast <4 x double> , %v %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> ret <4 x double> %s } define <4 x double> @fdiv_constant_op1(<4 x double> %v) { ; CHECK-LABEL: @fdiv_constant_op1( ; CHECK-NEXT: [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], ; CHECK-NEXT: ret <4 x double> [[S]] ; %b = fdiv reassoc <4 x double> %v, %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> ret <4 x double> %s } define <4 x double> @frem(<4 x double> %v) { ; CHECK-LABEL: @frem( ; CHECK-NEXT: [[B:%.*]] = frem <4 x double> , [[V:%.*]] ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[S]] ; %b = frem <4 x double> , %v %s = shufflevector <4 x double> %b, <4 x double> %v, <4 x i32> ret <4 x double> %s } ; Tests where both operands of the shuffle are binops with the same opcode. define <4 x i32> @add_add(<4 x i32> %v0) { ; CHECK-LABEL: @add_add( ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = add <4 x i32> %v0, %t2 = add <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @add_add_nsw(<4 x i32> %v0) { ; CHECK-LABEL: @add_add_nsw( ; CHECK-NEXT: [[T3:%.*]] = add nsw <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = add nsw <4 x i32> %v0, %t2 = add nsw <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @add_add_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @add_add_undef_mask_elt( ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = add <4 x i32> %v0, %t2 = add <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @add_add_nsw_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @add_add_nsw_undef_mask_elt( ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = add nsw <4 x i32> %v0, %t2 = add nsw <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Constant operand 0 (LHS) also works. define <4 x i32> @sub_sub(<4 x i32> %v0) { ; CHECK-LABEL: @sub_sub( ; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub <4 x i32> , %v0 %t2 = sub <4 x i32> , %v0 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @sub_sub_nuw(<4 x i32> %v0) { ; CHECK-LABEL: @sub_sub_nuw( ; CHECK-NEXT: [[T3:%.*]] = sub nuw <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub nuw <4 x i32> , %v0 %t2 = sub nuw <4 x i32> , %v0 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @sub_sub_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @sub_sub_undef_mask_elt( ; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub <4 x i32> , %v0 %t2 = sub <4 x i32> , %v0 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @sub_sub_nuw_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @sub_sub_nuw_undef_mask_elt( ; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub nuw <4 x i32> , %v0 %t2 = sub nuw <4 x i32> , %v0 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; If any element of the shuffle mask operand is undef, that element of the result is undef. ; The shuffle is eliminated in this transform, but we can replace a constant element with undef. define <4 x i32> @mul_mul(<4 x i32> %v0) { ; CHECK-LABEL: @mul_mul( ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul <4 x i32> %v0, %t2 = mul <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Preserve flags when possible. define <4 x i32> @shl_shl(<4 x i32> %v0) { ; CHECK-LABEL: @shl_shl( ; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl <4 x i32> %v0, %t2 = shl <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @shl_shl_nuw(<4 x i32> %v0) { ; CHECK-LABEL: @shl_shl_nuw( ; CHECK-NEXT: [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nuw <4 x i32> %v0, %t2 = shl nuw <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Shift by undef is poison. Undef must be replaced by safe constant. define <4 x i32> @shl_shl_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @shl_shl_undef_mask_elt( ; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl <4 x i32> %v0, %t2 = shl <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Shift by undef is poison. Undef must be replaced by safe constant. define <4 x i32> @shl_shl_nuw_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @shl_shl_nuw_undef_mask_elt( ; CHECK-NEXT: [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nuw <4 x i32> %v0, %t2 = shl nuw <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Can't propagate the flag here. define <4 x i32> @lshr_lshr(<4 x i32> %v0) { ; CHECK-LABEL: @lshr_lshr( ; CHECK-NEXT: [[T3:%.*]] = lshr <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr exact <4 x i32> , %v0 %t2 = lshr <4 x i32> , %v0 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Try weird types. define <3 x i32> @ashr_ashr(<3 x i32> %v0) { ; CHECK-LABEL: @ashr_ashr( ; CHECK-NEXT: [[T3:%.*]] = ashr <3 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t1 = ashr <3 x i32> %v0, %t2 = ashr <3 x i32> %v0, %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> ret <3 x i32> %t3 } define <3 x i42> @and_and(<3 x i42> %v0) { ; CHECK-LABEL: @and_and( ; CHECK-NEXT: [[T3:%.*]] = and <3 x i42> [[V0:%.*]], ; CHECK-NEXT: ret <3 x i42> [[T3]] ; %t1 = and <3 x i42> %v0, %t2 = and <3 x i42> %v0, %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> ret <3 x i42> %t3 } ; It doesn't matter if the intermediate ops have extra uses. define <4 x i32> @or_or(<4 x i32> %v0) { ; CHECK-LABEL: @or_or( ; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], ; CHECK-NEXT: [[T3:%.*]] = or <4 x i32> [[V0]], ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = or <4 x i32> %v0, %t2 = or <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> call void @use_v4i32(<4 x i32> %t1) ret <4 x i32> %t3 } define <4 x i32> @xor_xor(<4 x i32> %v0) { ; CHECK-LABEL: @xor_xor( ; CHECK-NEXT: [[T2:%.*]] = xor <4 x i32> [[V0:%.*]], ; CHECK-NEXT: [[T3:%.*]] = xor <4 x i32> [[V0]], ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]]) ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = xor <4 x i32> %v0, %t2 = xor <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> call void @use_v4i32(<4 x i32> %t2) ret <4 x i32> %t3 } define <4 x i32> @udiv_udiv(<4 x i32> %v0) { ; CHECK-LABEL: @udiv_udiv( ; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> , [[V0]] ; CHECK-NEXT: [[T3:%.*]] = udiv <4 x i32> , [[V0]] ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]]) ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv <4 x i32> , %v0 %t2 = udiv <4 x i32> , %v0 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> call void @use_v4i32(<4 x i32> %t1) call void @use_v4i32(<4 x i32> %t2) ret <4 x i32> %t3 } ; Div/rem need special handling if the shuffle has undef elements. define <4 x i32> @sdiv_sdiv(<4 x i32> %v0) { ; CHECK-LABEL: @sdiv_sdiv( ; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv <4 x i32> %v0, %t2 = sdiv <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @sdiv_sdiv_exact(<4 x i32> %v0) { ; CHECK-LABEL: @sdiv_sdiv_exact( ; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv exact <4 x i32> %v0, %t2 = sdiv exact <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @sdiv_sdiv_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @sdiv_sdiv_undef_mask_elt( ; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv <4 x i32> %v0, %t2 = sdiv <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @sdiv_sdiv_exact_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @sdiv_sdiv_exact_undef_mask_elt( ; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv exact <4 x i32> %v0, %t2 = sdiv exact <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @urem_urem(<4 x i32> %v0) { ; CHECK-LABEL: @urem_urem( ; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = urem <4 x i32> , %v0 %t2 = urem <4 x i32> , %v0 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; This is folded by using a safe constant. define <4 x i32> @urem_urem_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @urem_urem_undef_mask_elt( ; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = urem <4 x i32> , %v0 %t2 = urem <4 x i32> , %v0 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @srem_srem(<4 x i32> %v0) { ; CHECK-LABEL: @srem_srem( ; CHECK-NEXT: [[T3:%.*]] = srem <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = srem <4 x i32> , %v0 %t2 = srem <4 x i32> , %v0 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; This is folded by using a safe constant. define <4 x i32> @srem_srem_undef_mask_elt(<4 x i32> %v0) { ; CHECK-LABEL: @srem_srem_undef_mask_elt( ; CHECK-NEXT: [[T3:%.*]] = srem <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = srem <4 x i32> , %v0 %t2 = srem <4 x i32> , %v0 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Try FP ops/types. define <4 x float> @fadd_fadd(<4 x float> %v0) { ; CHECK-LABEL: @fadd_fadd( ; CHECK-NEXT: [[T3:%.*]] = fadd <4 x float> [[V0:%.*]], ; CHECK-NEXT: ret <4 x float> [[T3]] ; %t1 = fadd <4 x float> %v0, %t2 = fadd <4 x float> %v0, %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> ret <4 x float> %t3 } define <4 x double> @fsub_fsub(<4 x double> %v0) { ; CHECK-LABEL: @fsub_fsub( ; CHECK-NEXT: [[T3:%.*]] = fsub <4 x double> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x double> [[T3]] ; %t1 = fsub <4 x double> , %v0 %t2 = fsub <4 x double> , %v0 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> ret <4 x double> %t3 } ; Intersect any FMF. define <4 x float> @fmul_fmul(<4 x float> %v0) { ; CHECK-LABEL: @fmul_fmul( ; CHECK-NEXT: [[T3:%.*]] = fmul nnan ninf <4 x float> [[V0:%.*]], ; CHECK-NEXT: ret <4 x float> [[T3]] ; %t1 = fmul nnan ninf <4 x float> %v0, %t2 = fmul nnan ninf <4 x float> %v0, %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> ret <4 x float> %t3 } define <4 x double> @fdiv_fdiv(<4 x double> %v0) { ; CHECK-LABEL: @fdiv_fdiv( ; CHECK-NEXT: [[T3:%.*]] = fdiv nnan arcp <4 x double> , [[V0:%.*]] ; CHECK-NEXT: ret <4 x double> [[T3]] ; %t1 = fdiv fast <4 x double> , %v0 %t2 = fdiv nnan arcp <4 x double> , %v0 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> ret <4 x double> %t3 } ; The variable operand must be either the first operand or second operand in both binops. define <4 x double> @frem_frem(<4 x double> %v0) { ; CHECK-LABEL: @frem_frem( ; CHECK-NEXT: [[T1:%.*]] = frem <4 x double> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = frem <4 x double> [[V0]], ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[T3]] ; %t1 = frem <4 x double> , %v0 %t2 = frem <4 x double> %v0, %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> ret <4 x double> %t3 } define <4 x i32> @add_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @add_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = add <4 x i32> %v0, %t2 = add <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Constant operand 0 (LHS) also works. define <4 x i32> @sub_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sub_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub <4 x i32> , %v0 %t2 = sub <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @sub_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sub_2_vars_nsw( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sub nsw <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub nsw <4 x i32> , %v0 %t2 = sub nsw <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sub_2_vars_undef_mask_elt( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub <4 x i32> , %v0 %t2 = sub <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @sub_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sub_2_vars_nsw_undef_mask_elt( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sub <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sub nsw <4 x i32> , %v0 %t2 = sub nsw <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; If any element of the shuffle mask operand is undef, that element of the result is undef. ; The shuffle is eliminated in this transform, but we can replace a constant element with undef. define <4 x i32> @mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul <4 x i32> %v0, %t2 = mul <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @mul_2_vars_nuw(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_2_vars_nuw( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul nuw <4 x i32> %v0, %t2 = mul nuw <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_2_vars_undef_mask_elt( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul <4 x i32> %v0, %t2 = mul <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Poison flags must be dropped or undef must be replaced with safe constant. define <4 x i32> @mul_2_vars_nuw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_2_vars_nuw_undef_mask_elt( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul nuw <4 x i32> %v0, %t2 = mul nuw <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Preserve flags when possible. define <4 x i32> @shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl <4 x i32> %v0, %t2 = shl <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @shl_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_2_vars_nsw( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nsw <4 x i32> %v0, %t2 = shl nsw <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Shift by undef is poison. Undef is replaced by safe constant. define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_2_vars_undef_mask_elt( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl <4 x i32> %v0, %t2 = shl <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Shift by undef is poison. Undef is replaced by safe constant. define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_2_vars_nsw_undef_mask_elt( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nsw <4 x i32> %v0, %t2 = shl nsw <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Can't propagate the flag here. define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = lshr <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr <4 x i32> , %v0 %t2 = lshr exact <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @lshr_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars_exact( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = lshr exact <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr exact <4 x i32> , %v0 %t2 = lshr exact <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef. define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = lshr <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = lshr <4 x i32> , [[V1:%.*]] ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr <4 x i32> , %v0 %t2 = lshr <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef. define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars_exact_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = lshr exact <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = lshr exact <4 x i32> , [[V1:%.*]] ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = lshr exact <4 x i32> , %v0 %t2 = lshr exact <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Try weird types. define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) { ; CHECK-LABEL: @ashr_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[V1:%.*]], <3 x i32> [[V0:%.*]], <3 x i32> ; CHECK-NEXT: [[T3:%.*]] = ashr <3 x i32> [[TMP1]], ; CHECK-NEXT: ret <3 x i32> [[T3]] ; %t1 = ashr <3 x i32> %v0, %t2 = ashr <3 x i32> %v1, %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> ret <3 x i32> %t3 } define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) { ; CHECK-LABEL: @and_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> ; CHECK-NEXT: [[T3:%.*]] = and <3 x i42> [[TMP1]], ; CHECK-NEXT: ret <3 x i42> [[T3]] ; %t1 = and <3 x i42> %v0, %t2 = and <3 x i42> %v1, %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> ret <3 x i42> %t3 } ; It doesn't matter if only one intermediate op has extra uses. define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @or_2_vars( ; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = or <4 x i32> [[TMP1]], ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = or <4 x i32> %v0, %t2 = or <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> call void @use_v4i32(<4 x i32> %t1) ret <4 x i32> %t3 } ; But we don't transform if both intermediate values have extra uses. define <4 x i32> @xor_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @xor_2_vars( ; CHECK-NEXT: [[T1:%.*]] = xor <4 x i32> [[V0:%.*]], ; CHECK-NEXT: [[T2:%.*]] = xor <4 x i32> [[V1:%.*]], ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T2]]) ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = xor <4 x i32> %v0, %t2 = xor <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> call void @use_v4i32(<4 x i32> %t1) call void @use_v4i32(<4 x i32> %t2) ret <4 x i32> %t3 } ; Div/rem need special handling if the shuffle has undef elements. define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = udiv <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv <4 x i32> , %v0 %t2 = udiv <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_exact( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = udiv exact <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv exact <4 x i32> , %v0 %t2 = udiv exact <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; TODO: This could be transformed using a safe constant. define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = udiv <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = udiv <4 x i32> , [[V1:%.*]] ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv <4 x i32> , %v0 %t2 = udiv <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; TODO: This could be transformed using a safe constant. define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_exact_undef_mask_elt( ; CHECK-NEXT: [[T1:%.*]] = udiv exact <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = udiv exact <4 x i32> , [[V1:%.*]] ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = udiv exact <4 x i32> , %v0 %t2 = udiv exact <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; If the shuffle has no undefs, it's safe to shuffle the variables first. define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sdiv_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv <4 x i32> %v0, %t2 = sdiv <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @sdiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sdiv_2_vars_exact( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv exact <4 x i32> %v0, %t2 = sdiv exact <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Div by undef is UB. Undef is replaced by safe constant. define <4 x i32> @sdiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sdiv_2_vars_undef_mask_elt( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv <4 x i32> %v0, %t2 = sdiv <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Div by undef is UB. Undef is replaced by safe constant. define <4 x i32> @sdiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @sdiv_2_vars_exact_undef_mask_elt( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = sdiv exact <4 x i32> %v0, %t2 = sdiv exact <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; If the shuffle has no undefs, it's safe to shuffle the variables first. define <4 x i32> @urem_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @urem_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = urem <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = urem <4 x i32> , %v0 %t2 = urem <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @srem_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @srem_2_vars( ; CHECK-NEXT: [[T1:%.*]] = srem <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = srem <4 x i32> , [[V1:%.*]] ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = srem <4 x i32> , %v0 %t2 = srem <4 x i32> , %v1 %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Try FP ops/types. define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: @fadd_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = fadd <4 x float> [[TMP1]], ; CHECK-NEXT: ret <4 x float> [[T3]] ; %t1 = fadd <4 x float> %v0, %t2 = fadd <4 x float> %v1, %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> ret <4 x float> %t3 } define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) { ; CHECK-LABEL: @fsub_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = fsub <4 x double> , [[TMP1]] ; CHECK-NEXT: ret <4 x double> [[T3]] ; %t1 = fsub <4 x double> , %v0 %t2 = fsub <4 x double> , %v1 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> ret <4 x double> %t3 } ; Intersect any FMF. define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) { ; CHECK-LABEL: @fmul_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = fmul reassoc nsz <4 x float> [[TMP1]], ; CHECK-NEXT: ret <4 x float> [[T3]] ; %t1 = fmul reassoc nsz <4 x float> %v0, %t2 = fmul reassoc nsz <4 x float> %v1, %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> ret <4 x float> %t3 } define <4 x double> @frem_2_vars(<4 x double> %v0, <4 x double> %v1) { ; CHECK-LABEL: @frem_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = frem nnan <4 x double> , [[TMP1]] ; CHECK-NEXT: ret <4 x double> [[T3]] ; %t1 = frem nnan ninf <4 x double> , %v0 %t2 = frem nnan arcp <4 x double> , %v1 %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> ret <4 x double> %t3 } ; The variable operand must be either the first operand or second operand in both binops. define <4 x double> @fdiv_2_vars(<4 x double> %v0, <4 x double> %v1) { ; CHECK-LABEL: @fdiv_2_vars( ; CHECK-NEXT: [[T1:%.*]] = fdiv <4 x double> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = fdiv <4 x double> [[V1:%.*]], ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> ; CHECK-NEXT: ret <4 x double> [[T3]] ; %t1 = fdiv <4 x double> , %v0 %t2 = fdiv <4 x double> %v1, %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> ret <4 x double> %t3 } ; Shift-left with constant shift amount can be converted to mul to enable the fold. define <4 x i32> @mul_shl(<4 x i32> %v0) { ; CHECK-LABEL: @mul_shl( ; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul nuw <4 x i32> %v0, %t2 = shl nuw <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved. define <4 x i32> @shl_mul(<4 x i32> %v0) { ; CHECK-LABEL: @shl_mul( ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nsw <4 x i32> %v0, %t2 = mul nsw <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Demanded elements + simplification can remove the mul alone, but that's not the best case. define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) { ; CHECK-LABEL: @mul_is_nop_shl( ; CHECK-NEXT: [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul <4 x i32> %v0, %t2 = shl <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Negative test: shift amount (operand 1) must be constant. define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) { ; CHECK-LABEL: @shl_mul_not_constant_shift_amount( ; CHECK-NEXT: [[T1:%.*]] = shl <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = mul <4 x i32> [[V0]], ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T2]], <4 x i32> [[T1]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl <4 x i32> , %v0 %t2 = mul <4 x i32> %v0, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Try with 2 variable inputs. define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_shl_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = mul nuw <4 x i32> %v0, %t2 = shl nuw <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_mul_2_vars( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl nsw <4 x i32> %v0, %t2 = mul nsw <4 x i32> %v1, %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Or with constant can be converted to add to enable the fold. ; The 'shl' is here to allow analysis to determine that the 'or' can be transformed to 'add'. ; TODO: The 'or' constant is limited to a splat. define <4 x i32> @add_or(<4 x i32> %v) { ; CHECK-LABEL: @add_or( ; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[V0]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %v0 = shl <4 x i32> %v, ; clear the bottom bits %t1 = add <4 x i32> %v0, ; this can't be converted to 'or' %t2 = or <4 x i32> %v0, ; set the bottom bits %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } ; Try with 'or' as operand 0 of the shuffle. define <4 x i8> @or_add(<4 x i8> %v) { ; CHECK-LABEL: @or_add( ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], ; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], ; CHECK-NEXT: ret <4 x i8> [[T3]] ; %v0 = lshr <4 x i8> %v, ; clear the top bits %t1 = or <4 x i8> %v0, ; set some top bits %t2 = add nsw nuw <4 x i8> %v0, ; this can't be converted to 'or' %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> ret <4 x i8> %t3 } ; Negative test: not all 'or' insts can be converted to 'add'. define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { ; CHECK-LABEL: @or_add_not_enough_masking( ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], ; CHECK-NEXT: [[T1:%.*]] = or <4 x i8> [[V0]], ; CHECK-NEXT: [[T2:%.*]] = add <4 x i8> [[V0]], ; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[T3]] ; %v0 = lshr <4 x i8> %v, ; clear not enough top bits %t1 = or <4 x i8> %v0, ; set some top bits %t2 = add nsw nuw <4 x i8> %v0, ; this can't be converted to 'or' %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> ret <4 x i8> %t3 } ; Try with 2 variable inputs. define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { ; CHECK-LABEL: @add_or_2_vars( ; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %v0 = shl <4 x i32> %v, ; clear the bottom bits %t1 = add <4 x i32> %v1, ; this can't be converted to 'or' %t2 = or <4 x i32> %v0, ; set the bottom bits %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> ret <4 x i32> %t3 } define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) { ; CHECK-LABEL: @or_add_2_vars( ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], ; CHECK-NEXT: ret <4 x i8> [[T3]] ; %v0 = lshr <4 x i8> %v, ; clear the top bits %t1 = or <4 x i8> %v0, ; set some top bits %t2 = add nsw nuw <4 x i8> %v1, ; this can't be converted to 'or' %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> ret <4 x i8> %t3 } ; The undef operand is used to simplify the shuffle mask, but don't assert that too soon. define <4 x i32> @PR41419(<4 x i32> %v) { ; CHECK-LABEL: @PR41419( ; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %s = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> ret <4 x i32> %s }