117 lines
4.4 KiB
LLVM
117 lines
4.4 KiB
LLVM
|
; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 \
|
||
|
; RUN: -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s
|
||
|
define dso_local void @test(i32* %Arr, i32 signext %Len) {
|
||
|
; CHECK-LABEL: test:
|
||
|
; CHECK: lxvx [[REG:vs[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}
|
||
|
; CHECK-NOT: [[REG]]
|
||
|
; CHECK: xxbrw vs{{[0-9]+}}, [[REG]]
|
||
|
entry:
|
||
|
%cmp1 = icmp slt i32 0, %Len
|
||
|
br i1 %cmp1, label %for.body.lr.ph, label %for.cond.cleanup
|
||
|
|
||
|
for.body.lr.ph: ; preds = %entry
|
||
|
%min.iters.check = icmp ult i32 %Len, 4
|
||
|
br i1 %min.iters.check, label %scalar.ph, label %vector.ph
|
||
|
|
||
|
vector.ph: ; preds = %for.body.lr.ph
|
||
|
%n.mod.vf = urem i32 %Len, 4
|
||
|
%n.vec = sub i32 %Len, %n.mod.vf
|
||
|
br label %vector.body
|
||
|
|
||
|
vector.body: ; preds = %vector.body, %vector.ph
|
||
|
%index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
|
||
|
%broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0
|
||
|
%broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
|
||
|
%induction = add <4 x i32> %broadcast.splat, <i32 0, i32 1, i32 2, i32 3>
|
||
|
%0 = add i32 %index, 0
|
||
|
%1 = sext i32 %0 to i64
|
||
|
%2 = getelementptr inbounds i32, i32* %Arr, i64 %1
|
||
|
%3 = getelementptr inbounds i32, i32* %2, i32 0
|
||
|
%4 = bitcast i32* %3 to <4 x i32>*
|
||
|
%wide.load = load <4 x i32>, <4 x i32>* %4, align 4
|
||
|
%5 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %wide.load)
|
||
|
%6 = sext i32 %0 to i64
|
||
|
%7 = getelementptr inbounds i32, i32* %Arr, i64 %6
|
||
|
%8 = getelementptr inbounds i32, i32* %7, i32 0
|
||
|
%9 = bitcast i32* %8 to <4 x i32>*
|
||
|
store <4 x i32> %5, <4 x i32>* %9, align 4
|
||
|
%index.next = add i32 %index, 4
|
||
|
%10 = icmp eq i32 %index.next, %n.vec
|
||
|
br i1 %10, label %middle.block, label %vector.body
|
||
|
|
||
|
middle.block: ; preds = %vector.body
|
||
|
%cmp.n = icmp eq i32 %Len, %n.vec
|
||
|
br i1 %cmp.n, label %for.cond.for.cond.cleanup_crit_edge, label %scalar.ph
|
||
|
|
||
|
scalar.ph: ; preds = %middle.block, %for.body.lr.ph
|
||
|
%bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %for.body.lr.ph ]
|
||
|
br label %for.body
|
||
|
|
||
|
for.cond.for.cond.cleanup_crit_edge: ; preds = %middle.block, %for.inc
|
||
|
br label %for.cond.cleanup
|
||
|
|
||
|
for.cond.cleanup: ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry
|
||
|
br label %for.end
|
||
|
|
||
|
for.body: ; preds = %for.inc, %scalar.ph
|
||
|
%i.02 = phi i32 [ %bc.resume.val, %scalar.ph ], [ %inc, %for.inc ]
|
||
|
%idxprom = sext i32 %i.02 to i64
|
||
|
%arrayidx = getelementptr inbounds i32, i32* %Arr, i64 %idxprom
|
||
|
%11 = load i32, i32* %arrayidx, align 4
|
||
|
%12 = call i32 @llvm.bswap.i32(i32 %11)
|
||
|
%idxprom1 = sext i32 %i.02 to i64
|
||
|
%arrayidx2 = getelementptr inbounds i32, i32* %Arr, i64 %idxprom1
|
||
|
store i32 %12, i32* %arrayidx2, align 4
|
||
|
br label %for.inc
|
||
|
|
||
|
for.inc: ; preds = %for.body
|
||
|
%inc = add nsw i32 %i.02, 1
|
||
|
%cmp = icmp slt i32 %inc, %Len
|
||
|
br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
|
||
|
|
||
|
for.end: ; preds = %for.cond.cleanup
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
define dso_local <8 x i16> @test_halfword(<8 x i16> %a) local_unnamed_addr {
|
||
|
; CHECK-LABEL: test_halfword:
|
||
|
; CHECK: xxbrh vs34, vs34
|
||
|
; CHECK-NEXT: blr
|
||
|
entry:
|
||
|
%0 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a)
|
||
|
ret <8 x i16> %0
|
||
|
}
|
||
|
|
||
|
define dso_local <2 x i64> @test_doubleword(<2 x i64> %a) local_unnamed_addr {
|
||
|
; CHECK-LABEL: test_doubleword:
|
||
|
; CHECK: xxbrd vs34, vs34
|
||
|
; CHECK-NEXT: blr
|
||
|
entry:
|
||
|
%0 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a)
|
||
|
ret <2 x i64> %0
|
||
|
}
|
||
|
|
||
|
define dso_local <1 x i128> @test_quadword(<1 x i128> %a) local_unnamed_addr {
|
||
|
; CHECK-LABEL: test_quadword:
|
||
|
; CHECK: xxbrq vs34, vs34
|
||
|
; CHECK-NEXT: blr
|
||
|
entry:
|
||
|
%0 = call <1 x i128> @llvm.bswap.v1i128(<1 x i128> %a)
|
||
|
ret <1 x i128> %0
|
||
|
}
|
||
|
|
||
|
; Function Attrs: nounwind readnone speculatable willreturn
|
||
|
declare <1 x i128> @llvm.bswap.v1i128(<1 x i128>)
|
||
|
|
||
|
; Function Attrs: nounwind readnone speculatable willreturn
|
||
|
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
|
||
|
|
||
|
; Function Attrs: nounwind readnone speculatable willreturn
|
||
|
declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
|
||
|
|
||
|
; Function Attrs: nounwind readnone speculatable willreturn
|
||
|
declare i32 @llvm.bswap.i32(i32)
|
||
|
|
||
|
; Function Attrs: nounwind readnone speculatable willreturn
|
||
|
declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
|