; RUN: llc < %s -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 \ ; RUN: -verify-machineinstrs -ppc-asm-full-reg-names | FileCheck %s define dso_local void @test(i32* %Arr, i32 signext %Len) { ; CHECK-LABEL: test: ; CHECK: lxvx [[REG:vs[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}} ; CHECK-NOT: [[REG]] ; CHECK: xxbrw vs{{[0-9]+}}, [[REG]] entry: %cmp1 = icmp slt i32 0, %Len br i1 %cmp1, label %for.body.lr.ph, label %for.cond.cleanup for.body.lr.ph: ; preds = %entry %min.iters.check = icmp ult i32 %Len, 4 br i1 %min.iters.check, label %scalar.ph, label %vector.ph vector.ph: ; preds = %for.body.lr.ph %n.mod.vf = urem i32 %Len, 4 %n.vec = sub i32 %Len, %n.mod.vf br label %vector.body vector.body: ; preds = %vector.body, %vector.ph %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ] %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %index, i32 0 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer %induction = add <4 x i32> %broadcast.splat, %0 = add i32 %index, 0 %1 = sext i32 %0 to i64 %2 = getelementptr inbounds i32, i32* %Arr, i64 %1 %3 = getelementptr inbounds i32, i32* %2, i32 0 %4 = bitcast i32* %3 to <4 x i32>* %wide.load = load <4 x i32>, <4 x i32>* %4, align 4 %5 = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %wide.load) %6 = sext i32 %0 to i64 %7 = getelementptr inbounds i32, i32* %Arr, i64 %6 %8 = getelementptr inbounds i32, i32* %7, i32 0 %9 = bitcast i32* %8 to <4 x i32>* store <4 x i32> %5, <4 x i32>* %9, align 4 %index.next = add i32 %index, 4 %10 = icmp eq i32 %index.next, %n.vec br i1 %10, label %middle.block, label %vector.body middle.block: ; preds = %vector.body %cmp.n = icmp eq i32 %Len, %n.vec br i1 %cmp.n, label %for.cond.for.cond.cleanup_crit_edge, label %scalar.ph scalar.ph: ; preds = %middle.block, %for.body.lr.ph %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %for.body.lr.ph ] br label %for.body for.cond.for.cond.cleanup_crit_edge: ; preds = %middle.block, %for.inc br label %for.cond.cleanup for.cond.cleanup: ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry br label %for.end for.body: ; preds = %for.inc, %scalar.ph %i.02 = phi i32 [ %bc.resume.val, %scalar.ph ], [ %inc, %for.inc ] %idxprom = sext i32 %i.02 to i64 %arrayidx = getelementptr inbounds i32, i32* %Arr, i64 %idxprom %11 = load i32, i32* %arrayidx, align 4 %12 = call i32 @llvm.bswap.i32(i32 %11) %idxprom1 = sext i32 %i.02 to i64 %arrayidx2 = getelementptr inbounds i32, i32* %Arr, i64 %idxprom1 store i32 %12, i32* %arrayidx2, align 4 br label %for.inc for.inc: ; preds = %for.body %inc = add nsw i32 %i.02, 1 %cmp = icmp slt i32 %inc, %Len br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge for.end: ; preds = %for.cond.cleanup ret void } define dso_local <8 x i16> @test_halfword(<8 x i16> %a) local_unnamed_addr { ; CHECK-LABEL: test_halfword: ; CHECK: xxbrh vs34, vs34 ; CHECK-NEXT: blr entry: %0 = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %a) ret <8 x i16> %0 } define dso_local <2 x i64> @test_doubleword(<2 x i64> %a) local_unnamed_addr { ; CHECK-LABEL: test_doubleword: ; CHECK: xxbrd vs34, vs34 ; CHECK-NEXT: blr entry: %0 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %a) ret <2 x i64> %0 } define dso_local <1 x i128> @test_quadword(<1 x i128> %a) local_unnamed_addr { ; CHECK-LABEL: test_quadword: ; CHECK: xxbrq vs34, vs34 ; CHECK-NEXT: blr entry: %0 = call <1 x i128> @llvm.bswap.v1i128(<1 x i128> %a) ret <1 x i128> %0 } ; Function Attrs: nounwind readnone speculatable willreturn declare <1 x i128> @llvm.bswap.v1i128(<1 x i128>) ; Function Attrs: nounwind readnone speculatable willreturn declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>) ; Function Attrs: nounwind readnone speculatable willreturn declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>) ; Function Attrs: nounwind readnone speculatable willreturn declare i32 @llvm.bswap.i32(i32) ; Function Attrs: nounwind readnone speculatable willreturn declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)