282 lines
10 KiB
LLVM
282 lines
10 KiB
LLVM
|
; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=powerpc64-unknown-linux -S -mcpu=pwr8 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8
|
||
|
; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9
|
||
|
; REQUIRES: asserts
|
||
|
|
||
|
@a = global [1024 x i8] zeroinitializer, align 16
|
||
|
@b = global [1024 x i8] zeroinitializer, align 16
|
||
|
|
||
|
define i32 @foo() {
|
||
|
; CHECK-LABEL: foo
|
||
|
|
||
|
; CHECK-PWR8: Setting best plan to VF=16, UF=4
|
||
|
|
||
|
; CHECK-PWR9: Setting best plan to VF=8, UF=8
|
||
|
|
||
|
|
||
|
entry:
|
||
|
br label %for.body
|
||
|
|
||
|
for.cond.cleanup:
|
||
|
%add.lcssa = phi i32 [ %add, %for.body ]
|
||
|
ret i32 %add.lcssa
|
||
|
|
||
|
for.body:
|
||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||
|
%s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
||
|
%arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
|
||
|
%0 = load i8, i8* %arrayidx, align 1
|
||
|
%conv = zext i8 %0 to i32
|
||
|
%arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
|
||
|
%1 = load i8, i8* %arrayidx2, align 1
|
||
|
%conv3 = zext i8 %1 to i32
|
||
|
%sub = sub nsw i32 %conv, %conv3
|
||
|
%ispos = icmp sgt i32 %sub, -1
|
||
|
%neg = sub nsw i32 0, %sub
|
||
|
%2 = select i1 %ispos, i32 %sub, i32 %neg
|
||
|
%add = add nsw i32 %2, %s.015
|
||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
||
|
}
|
||
|
|
||
|
define i32 @goo() {
|
||
|
; For indvars.iv used in a computating chain only feeding into getelementptr or cmp,
|
||
|
; it will not have vector version and the vector register usage will not exceed the
|
||
|
; available vector register number.
|
||
|
|
||
|
; CHECK-LABEL: goo
|
||
|
|
||
|
; CHECK: Setting best plan to VF=16, UF=4
|
||
|
|
||
|
entry:
|
||
|
br label %for.body
|
||
|
|
||
|
for.cond.cleanup: ; preds = %for.body
|
||
|
%add.lcssa = phi i32 [ %add, %for.body ]
|
||
|
ret i32 %add.lcssa
|
||
|
|
||
|
for.body: ; preds = %for.body, %entry
|
||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||
|
%s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
|
||
|
%tmp1 = add nsw i64 %indvars.iv, 3
|
||
|
%arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %tmp1
|
||
|
%tmp = load i8, i8* %arrayidx, align 1
|
||
|
%conv = zext i8 %tmp to i32
|
||
|
%tmp2 = add nsw i64 %indvars.iv, 2
|
||
|
%arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %tmp2
|
||
|
%tmp3 = load i8, i8* %arrayidx2, align 1
|
||
|
%conv3 = zext i8 %tmp3 to i32
|
||
|
%sub = sub nsw i32 %conv, %conv3
|
||
|
%ispos = icmp sgt i32 %sub, -1
|
||
|
%neg = sub nsw i32 0, %sub
|
||
|
%tmp4 = select i1 %ispos, i32 %sub, i32 %neg
|
||
|
%add = add nsw i32 %tmp4, %s.015
|
||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||
|
%exitcond = icmp eq i64 %indvars.iv.next, 1024
|
||
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
||
|
}
|
||
|
|
||
|
define i64 @bar(i64* nocapture %a) {
|
||
|
; CHECK-LABEL: bar
|
||
|
|
||
|
; CHECK: Setting best plan to VF=2, UF=12
|
||
|
|
||
|
entry:
|
||
|
br label %for.body
|
||
|
|
||
|
for.cond.cleanup:
|
||
|
%add2.lcssa = phi i64 [ %add2, %for.body ]
|
||
|
ret i64 %add2.lcssa
|
||
|
|
||
|
for.body:
|
||
|
%i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
|
||
|
%s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
|
||
|
%arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
|
||
|
%0 = load i64, i64* %arrayidx, align 8
|
||
|
%add = add nsw i64 %0, %i.012
|
||
|
store i64 %add, i64* %arrayidx, align 8
|
||
|
%add2 = add nsw i64 %add, %s.011
|
||
|
%inc = add nuw nsw i64 %i.012, 1
|
||
|
%exitcond = icmp eq i64 %inc, 1024
|
||
|
br i1 %exitcond, label %for.cond.cleanup, label %for.body
|
||
|
}
|
||
|
|
||
|
@d = external global [0 x i64], align 8
|
||
|
@e = external global [0 x i32], align 4
|
||
|
@c = external global [0 x i32], align 4
|
||
|
|
||
|
define void @hoo(i32 %n) {
|
||
|
; CHECK-LABEL: hoo
|
||
|
; CHECK: Setting best plan to VF=1, UF=12
|
||
|
|
||
|
entry:
|
||
|
br label %for.body
|
||
|
|
||
|
for.body: ; preds = %for.body, %entry
|
||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||
|
%arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv
|
||
|
%tmp = load i64, i64* %arrayidx, align 8
|
||
|
%arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp
|
||
|
%tmp1 = load i32, i32* %arrayidx1, align 4
|
||
|
%arrayidx3 = getelementptr inbounds [0 x i32], [0 x i32]* @c, i64 0, i64 %indvars.iv
|
||
|
store i32 %tmp1, i32* %arrayidx3, align 4
|
||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||
|
%exitcond = icmp eq i64 %indvars.iv.next, 10000
|
||
|
br i1 %exitcond, label %for.end, label %for.body
|
||
|
|
||
|
for.end: ; preds = %for.body
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define float @float_(float* nocapture readonly %a, float* nocapture readonly %b, i32 %n) {
|
||
|
;CHECK-LABEL: float_
|
||
|
;CHECK: LV(REG): VF = 1
|
||
|
;CHECK: LV(REG): Found max usage: 2 item
|
||
|
;CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
|
||
|
;CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 3 registers
|
||
|
;CHECK: LV(REG): Found invariant usage: 1 item
|
||
|
;CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers
|
||
|
|
||
|
entry:
|
||
|
%cmp = icmp sgt i32 %n, 0
|
||
|
br i1 %cmp, label %preheader, label %for.end
|
||
|
|
||
|
preheader:
|
||
|
%t0 = sext i32 %n to i64
|
||
|
br label %for
|
||
|
|
||
|
for:
|
||
|
%indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ]
|
||
|
%s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ]
|
||
|
%arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
|
||
|
%t1 = load float, float* %arrayidx, align 4
|
||
|
%arrayidx3 = getelementptr inbounds float, float* %b, i64 %indvars.iv
|
||
|
%t2 = load float, float* %arrayidx3, align 4
|
||
|
%add = fadd fast float %t1, %s.02
|
||
|
%add4 = fadd fast float %add, %t2
|
||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 32
|
||
|
%cmp1 = icmp slt i64 %indvars.iv.next, %t0
|
||
|
br i1 %cmp1, label %for, label %loopexit
|
||
|
|
||
|
loopexit:
|
||
|
%add4.lcssa = phi float [ %add4, %for ]
|
||
|
br label %for.end
|
||
|
|
||
|
for.end:
|
||
|
%s.0.lcssa = phi float [ 0.0, %entry ], [ %add4.lcssa, %loopexit ]
|
||
|
ret float %s.0.lcssa
|
||
|
}
|
||
|
|
||
|
|
||
|
define void @double_(double* nocapture %A, i32 %n) nounwind uwtable ssp {
|
||
|
;CHECK-LABEL: double_
|
||
|
;CHECK-PWR8: LV(REG): VF = 2
|
||
|
;CHECK-PWR8: LV(REG): Found max usage: 2 item
|
||
|
;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
|
||
|
;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers
|
||
|
;CHECK-PWR8: LV(REG): Found invariant usage: 1 item
|
||
|
;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 1 registers
|
||
|
|
||
|
;CHECK-PWR9: LV(REG): VF = 1
|
||
|
;CHECK-PWR9: LV(REG): Found max usage: 2 item
|
||
|
;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
|
||
|
;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers
|
||
|
;CHECK-PWR9: LV(REG): Found invariant usage: 1 item
|
||
|
;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers
|
||
|
|
||
|
%1 = sext i32 %n to i64
|
||
|
br label %2
|
||
|
|
||
|
; <label>:2 ; preds = %2, %0
|
||
|
%indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
|
||
|
%3 = getelementptr inbounds double, double* %A, i64 %indvars.iv
|
||
|
%4 = load double, double* %3, align 8
|
||
|
%5 = fadd double %4, 3.000000e+00
|
||
|
%6 = fmul double %4, 2.000000e+00
|
||
|
%7 = fadd double %5, %6
|
||
|
%8 = fadd double %7, 2.000000e+00
|
||
|
%9 = fmul double %8, 5.000000e-01
|
||
|
%10 = fadd double %6, %9
|
||
|
%11 = fsub double %10, %5
|
||
|
%12 = fadd double %4, %11
|
||
|
%13 = fdiv double %8, %12
|
||
|
%14 = fmul double %13, %8
|
||
|
%15 = fmul double %6, %14
|
||
|
%16 = fmul double %5, %15
|
||
|
%17 = fadd double %16, -3.000000e+00
|
||
|
%18 = fsub double %4, %5
|
||
|
%19 = fadd double %6, %18
|
||
|
%20 = fadd double %13, %19
|
||
|
%21 = fadd double %20, %17
|
||
|
%22 = fadd double %21, 3.000000e+00
|
||
|
%23 = fmul double %4, %22
|
||
|
store double %23, double* %3, align 8
|
||
|
%indvars.iv.next = add i64 %indvars.iv, -1
|
||
|
%24 = trunc i64 %indvars.iv to i32
|
||
|
%25 = icmp eq i32 %24, 0
|
||
|
br i1 %25, label %26, label %2
|
||
|
|
||
|
; <label>:26 ; preds = %2
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define ppc_fp128 @fp128_(ppc_fp128* nocapture %n, ppc_fp128 %d) nounwind readonly {
|
||
|
;CHECK-LABEL: fp128_
|
||
|
;CHECK: LV(REG): VF = 1
|
||
|
;CHECK: LV(REG): Found max usage: 2 item
|
||
|
;CHECK: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
|
||
|
;CHECK: LV(REG): RegisterClass: PPC::VRRC, 2 registers
|
||
|
entry:
|
||
|
br label %for.body
|
||
|
|
||
|
for.body: ; preds = %for.body, %entry
|
||
|
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||
|
%x.05 = phi ppc_fp128 [ %d, %entry ], [ %sub, %for.body ]
|
||
|
%arrayidx = getelementptr inbounds ppc_fp128, ppc_fp128* %n, i32 %i.06
|
||
|
%0 = load ppc_fp128, ppc_fp128* %arrayidx, align 8
|
||
|
%sub = fsub ppc_fp128 %x.05, %0
|
||
|
%inc = add nsw i32 %i.06, 1
|
||
|
%exitcond = icmp eq i32 %inc, 2048
|
||
|
br i1 %exitcond, label %for.end, label %for.body
|
||
|
|
||
|
for.end: ; preds = %for.body
|
||
|
ret ppc_fp128 %sub
|
||
|
}
|
||
|
|
||
|
|
||
|
define void @fp16_(half* nocapture readonly %pIn, half* nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 {
|
||
|
;CHECK-LABEL: fp16_
|
||
|
;CHECK: LV(REG): VF = 1
|
||
|
;CHECK: LV(REG): Found max usage: 2 item
|
||
|
;CHECK: LV(REG): RegisterClass: PPC::GPRRC, 4 registers
|
||
|
;CHECK: LV(REG): RegisterClass: PPC::VSXRC, 2 registers
|
||
|
entry:
|
||
|
%tmp.0.extract.trunc = trunc i32 %scale.coerce to i16
|
||
|
%0 = bitcast i16 %tmp.0.extract.trunc to half
|
||
|
%mul = mul i32 %numCols, %numRows
|
||
|
%shr = lshr i32 %mul, 2
|
||
|
%cmp26 = icmp eq i32 %shr, 0
|
||
|
br i1 %cmp26, label %while.end, label %while.body
|
||
|
|
||
|
while.body: ; preds = %entry, %while.body
|
||
|
%pIn.addr.029 = phi half* [ %add.ptr, %while.body ], [ %pIn, %entry ]
|
||
|
%pOut.addr.028 = phi half* [ %add.ptr7, %while.body ], [ %pOut, %entry ]
|
||
|
%blkCnt.027 = phi i32 [ %dec, %while.body ], [ %shr, %entry ]
|
||
|
%1 = load half, half* %pIn.addr.029, align 2
|
||
|
%arrayidx2 = getelementptr inbounds half, half* %pIn.addr.029, i32 1
|
||
|
%2 = load half, half* %arrayidx2, align 2
|
||
|
%mul3 = fmul half %1, %0
|
||
|
%mul4 = fmul half %2, %0
|
||
|
store half %mul3, half* %pOut.addr.028, align 2
|
||
|
%arrayidx6 = getelementptr inbounds half, half* %pOut.addr.028, i32 1
|
||
|
store half %mul4, half* %arrayidx6, align 2
|
||
|
%add.ptr = getelementptr inbounds half, half* %pIn.addr.029, i32 2
|
||
|
%add.ptr7 = getelementptr inbounds half, half* %pOut.addr.028, i32 2
|
||
|
%dec = add nsw i32 %blkCnt.027, -1
|
||
|
%cmp = icmp eq i32 %dec, 0
|
||
|
br i1 %cmp, label %while.end, label %while.body
|
||
|
|
||
|
while.end: ; preds = %while.body, %entry
|
||
|
ret void
|
||
|
}
|