llvm-for-llvmta/test/Transforms/InstCombine/vector-reductions.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s

declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
declare void @use_f32(float)

declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
declare void @use_i32(i32)

define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
; CHECK-LABEL: @diff_of_sums_v4f32(
; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz <4 x float> [[V0:%.*]], [[V1:%.*]]
; CHECK-NEXT:    [[TMP2:%.*]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[TMP1]])
; CHECK-NEXT:    [[R:%.*]] = fsub reassoc nsz float [[TMP2]], [[A1:%.*]]
; CHECK-NEXT:    ret float [[R]]
;
  %r0 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
  %r1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)
  %r = fsub reassoc nsz float %r0, %r1
  ret float %r
}

; negative test - fsub must allow reassociation

define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
; CHECK-LABEL: @diff_of_sums_v4f32_fmf(
; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
; CHECK-NEXT:    [[R:%.*]] = fsub nnan ninf nsz float [[R0]], [[R1]]
; CHECK-NEXT:    ret float [[R]]
;
  %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
  %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)
  %r = fsub ninf nnan nsz float %r0, %r1
  ret float %r
}

; negative test - extra uses could create extra instructions

define float @diff_of_sums_extra_use1(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
; CHECK-LABEL: @diff_of_sums_extra_use1(
; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
; CHECK-NEXT:    call void @use_f32(float [[R0]])
; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[R0]], [[R1]]
; CHECK-NEXT:    ret float [[R]]
;
  %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
  call void @use_f32(float %r0)
  %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)
  %r = fsub fast float %r0, %r1
  ret float %r
}

; negative test - extra uses could create extra instructions

define float @diff_of_sums_extra_use2(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {
; CHECK-LABEL: @diff_of_sums_extra_use2(
; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.*]], <4 x float> [[V1:%.*]])
; CHECK-NEXT:    call void @use_f32(float [[R1]])
; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[R0]], [[R1]]
; CHECK-NEXT:    ret float [[R]]
;
  %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
  %r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)
  call void @use_f32(float %r1)
  %r = fsub fast float %r0, %r1
  ret float %r
}

; negative test - can't reassociate different vector types

define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1, <8 x float> %v1) {
; CHECK-LABEL: @diff_of_sums_type_mismatch(
; CHECK-NEXT:    [[R0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.*]], <4 x float> [[V0:%.*]])
; CHECK-NEXT:    [[R1:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float [[A1:%.*]], <8 x float> [[V1:%.*]])
; CHECK-NEXT:    [[R:%.*]] = fsub fast float [[R0]], [[R1]]
; CHECK-NEXT:    ret float [[R]]
;
  %r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)
  %r1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a1, <8 x float> %v1)
  %r = fsub fast float %r0, %r1
  ret float %r
}

define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @diff_of_sums_v4i32(
; CHECK-NEXT:    [[TMP1:%.*]] = sub <4 x i32> [[V0:%.*]], [[V1:%.*]]
; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])
; CHECK-NEXT:    ret i32 [[TMP2]]
;
  %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0)
  %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)
  %r = sub i32 %r0, %r1
  ret i32 %r
}

; negative test - extra uses could create extra instructions

define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @diff_of_sums_v4i32_extra_use1(
; CHECK-NEXT:    [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
; CHECK-NEXT:    call void @use_i32(i32 [[R0]])
; CHECK-NEXT:    [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]])
; CHECK-NEXT:    [[R:%.*]] = sub i32 [[R0]], [[R1]]
; CHECK-NEXT:    ret i32 [[R]]
;
  %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0)
  call void @use_i32(i32 %r0)
  %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)
  %r = sub i32 %r0, %r1
  ret i32 %r
}

; negative test - extra uses could create extra instructions

define i32 @diff_of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @diff_of_sums_v4i32_extra_use2(
; CHECK-NEXT:    [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.*]])
; CHECK-NEXT:    [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]])
; CHECK-NEXT:    call void @use_i32(i32 [[R1]])
; CHECK-NEXT:    [[R:%.*]] = sub i32 [[R0]], [[R1]]
; CHECK-NEXT:    ret i32 [[R]]
;
  %r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0)
  %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)
  call void @use_i32(i32 %r1)
  %r = sub i32 %r0, %r1
  ret i32 %r
}

; negative test - can't reassociate different vector types

define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) {
; CHECK-LABEL: @diff_of_sums_type_mismatch2(
; CHECK-NEXT:    [[R0:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V0:%.*]])
; CHECK-NEXT:    [[R1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.*]])
; CHECK-NEXT:    [[R:%.*]] = sub i32 [[R0]], [[R1]]
; CHECK-NEXT:    ret i32 [[R]]
;
  %r0 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v0)
  %r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)
  %r = sub i32 %r0, %r1
  ret i32 %r
}
first commit 2022-04-25 10:02:23 +02:00			`; NOTE: Assertions have been autogenerated by utils/update_test_checks.py`
			`; RUN: opt < %s -instcombine -S \| FileCheck %s`

			`declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)`
			`declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)`
			`declare void @use_f32(float)`

			`declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)`
			`declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)`
			`declare void @use_i32(i32)`

			`define float @diff_of_sums_v4f32(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {`
			`; CHECK-LABEL: @diff_of_sums_v4f32(`
			`; CHECK-NEXT: [[TMP1:%.]] = fsub reassoc nsz <4 x float> [[V0:%.]], [[V1:%.*]]`
			`; CHECK-NEXT: [[TMP2:%.]] = call reassoc nsz float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.]], <4 x float> [[TMP1]])`
			`; CHECK-NEXT: [[R:%.]] = fsub reassoc nsz float [[TMP2]], [[A1:%.]]`
			`; CHECK-NEXT: ret float [[R]]`
			`;`
			`%r0 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)`
			`%r1 = call float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)`
			`%r = fsub reassoc nsz float %r0, %r1`
			`ret float %r`
			`}`

			`; negative test - fsub must allow reassociation`

			`define float @diff_of_sums_v4f32_fmf(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {`
			`; CHECK-LABEL: @diff_of_sums_v4f32_fmf(`
			`; CHECK-NEXT: [[R0:%.]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.]], <4 x float> [[V0:%.*]])`
			`; CHECK-NEXT: [[R1:%.]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.]], <4 x float> [[V1:%.*]])`
			`; CHECK-NEXT: [[R:%.*]] = fsub nnan ninf nsz float [[R0]], [[R1]]`
			`; CHECK-NEXT: ret float [[R]]`
			`;`
			`%r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)`
			`%r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)`
			`%r = fsub ninf nnan nsz float %r0, %r1`
			`ret float %r`
			`}`

			`; negative test - extra uses could create extra instructions`

			`define float @diff_of_sums_extra_use1(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {`
			`; CHECK-LABEL: @diff_of_sums_extra_use1(`
			`; CHECK-NEXT: [[R0:%.]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.]], <4 x float> [[V0:%.*]])`
			`; CHECK-NEXT: call void @use_f32(float [[R0]])`
			`; CHECK-NEXT: [[R1:%.]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.]], <4 x float> [[V1:%.*]])`
			`; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]]`
			`; CHECK-NEXT: ret float [[R]]`
			`;`
			`%r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)`
			`call void @use_f32(float %r0)`
			`%r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)`
			`%r = fsub fast float %r0, %r1`
			`ret float %r`
			`}`

			`; negative test - extra uses could create extra instructions`

			`define float @diff_of_sums_extra_use2(float %a0, <4 x float> %v0, float %a1, <4 x float> %v1) {`
			`; CHECK-LABEL: @diff_of_sums_extra_use2(`
			`; CHECK-NEXT: [[R0:%.]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.]], <4 x float> [[V0:%.*]])`
			`; CHECK-NEXT: [[R1:%.]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A1:%.]], <4 x float> [[V1:%.*]])`
			`; CHECK-NEXT: call void @use_f32(float [[R1]])`
			`; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]]`
			`; CHECK-NEXT: ret float [[R]]`
			`;`
			`%r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)`
			`%r1 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a1, <4 x float> %v1)`
			`call void @use_f32(float %r1)`
			`%r = fsub fast float %r0, %r1`
			`ret float %r`
			`}`

			`; negative test - can't reassociate different vector types`

			`define float @diff_of_sums_type_mismatch(float %a0, <4 x float> %v0, float %a1, <8 x float> %v1) {`
			`; CHECK-LABEL: @diff_of_sums_type_mismatch(`
			`; CHECK-NEXT: [[R0:%.]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[A0:%.]], <4 x float> [[V0:%.*]])`
			`; CHECK-NEXT: [[R1:%.]] = call fast float @llvm.vector.reduce.fadd.v8f32(float [[A1:%.]], <8 x float> [[V1:%.*]])`
			`; CHECK-NEXT: [[R:%.*]] = fsub fast float [[R0]], [[R1]]`
			`; CHECK-NEXT: ret float [[R]]`
			`;`
			`%r0 = call fast float @llvm.vector.reduce.fadd.f32.v4f32(float %a0, <4 x float> %v0)`
			`%r1 = call fast float @llvm.vector.reduce.fadd.f32.v8f32(float %a1, <8 x float> %v1)`
			`%r = fsub fast float %r0, %r1`
			`ret float %r`
			`}`

			`define i32 @diff_of_sums_v4i32(<4 x i32> %v0, <4 x i32> %v1) {`
			`; CHECK-LABEL: @diff_of_sums_v4i32(`
			`; CHECK-NEXT: [[TMP1:%.]] = sub <4 x i32> [[V0:%.]], [[V1:%.*]]`
			`; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP1]])`
			`; CHECK-NEXT: ret i32 [[TMP2]]`
			`;`
			`%r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0)`
			`%r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)`
			`%r = sub i32 %r0, %r1`
			`ret i32 %r`
			`}`

			`; negative test - extra uses could create extra instructions`

			`define i32 @diff_of_sums_v4i32_extra_use1(<4 x i32> %v0, <4 x i32> %v1) {`
			`; CHECK-LABEL: @diff_of_sums_v4i32_extra_use1(`
			`; CHECK-NEXT: [[R0:%.]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.]])`
			`; CHECK-NEXT: call void @use_i32(i32 [[R0]])`
			`; CHECK-NEXT: [[R1:%.]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.]])`
			`; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]]`
			`; CHECK-NEXT: ret i32 [[R]]`
			`;`
			`%r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0)`
			`call void @use_i32(i32 %r0)`
			`%r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)`
			`%r = sub i32 %r0, %r1`
			`ret i32 %r`
			`}`

			`; negative test - extra uses could create extra instructions`

			`define i32 @diff_of_sums_v4i32_extra_use2(<4 x i32> %v0, <4 x i32> %v1) {`
			`; CHECK-LABEL: @diff_of_sums_v4i32_extra_use2(`
			`; CHECK-NEXT: [[R0:%.]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V0:%.]])`
			`; CHECK-NEXT: [[R1:%.]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.]])`
			`; CHECK-NEXT: call void @use_i32(i32 [[R1]])`
			`; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]]`
			`; CHECK-NEXT: ret i32 [[R]]`
			`;`
			`%r0 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v0)`
			`%r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)`
			`call void @use_i32(i32 %r1)`
			`%r = sub i32 %r0, %r1`
			`ret i32 %r`
			`}`

			`; negative test - can't reassociate different vector types`

			`define i32 @diff_of_sums_type_mismatch2(<8 x i32> %v0, <4 x i32> %v1) {`
			`; CHECK-LABEL: @diff_of_sums_type_mismatch2(`
			`; CHECK-NEXT: [[R0:%.]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[V0:%.]])`
			`; CHECK-NEXT: [[R1:%.]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[V1:%.]])`
			`; CHECK-NEXT: [[R:%.*]] = sub i32 [[R0]], [[R1]]`
			`; CHECK-NEXT: ret i32 [[R]]`
			`;`
			`%r0 = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %v0)`
			`%r1 = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %v1)`
			`%r = sub i32 %r0, %r1`
			`ret i32 %r`
			`}`