259 lines
13 KiB
LLVM
259 lines
13 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
|
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s
|
|
|
|
; void foo(float *data, float d) {
|
|
; long i;
|
|
; for (i = 0; i < 8000; i++)
|
|
; data[i] = d;
|
|
; }
|
|
;
|
|
; This loop will be unrolled by 96 and vectorized on power9.
|
|
; icmp for loop iteration index and loop trip count(384) has LSRUse for 'reg({0,+,384})'.
|
|
; Make sure above icmp does not impact LSR choose best formulae sets based on 'reg({(192 + %0),+,384})'
|
|
|
|
define void @foo(float* nocapture %data, float %d) {
|
|
; CHECK-LABEL: foo:
|
|
; CHECK: # %bb.0: # %entry
|
|
; CHECK-NEXT: xscvdpspn 0, 1
|
|
; CHECK-NEXT: li 5, 83
|
|
; CHECK-NEXT: addi 4, 3, 192
|
|
; CHECK-NEXT: mtctr 5
|
|
; CHECK-NEXT: xxspltw 0, 0, 0
|
|
; CHECK-NEXT: .p2align 4
|
|
; CHECK-NEXT: .LBB0_1: # %vector.body
|
|
; CHECK-NEXT: #
|
|
; CHECK-NEXT: stxv 0, -192(4)
|
|
; CHECK-NEXT: stxv 0, -176(4)
|
|
; CHECK-NEXT: stxv 0, -160(4)
|
|
; CHECK-NEXT: stxv 0, -144(4)
|
|
; CHECK-NEXT: stxv 0, -128(4)
|
|
; CHECK-NEXT: stxv 0, -112(4)
|
|
; CHECK-NEXT: stxv 0, -96(4)
|
|
; CHECK-NEXT: stxv 0, -80(4)
|
|
; CHECK-NEXT: stxv 0, -64(4)
|
|
; CHECK-NEXT: stxv 0, -48(4)
|
|
; CHECK-NEXT: stxv 0, -32(4)
|
|
; CHECK-NEXT: stxv 0, -16(4)
|
|
; CHECK-NEXT: stxv 0, 0(4)
|
|
; CHECK-NEXT: stxv 0, 16(4)
|
|
; CHECK-NEXT: stxv 0, 32(4)
|
|
; CHECK-NEXT: stxv 0, 48(4)
|
|
; CHECK-NEXT: stxv 0, 64(4)
|
|
; CHECK-NEXT: stxv 0, 80(4)
|
|
; CHECK-NEXT: stxv 0, 96(4)
|
|
; CHECK-NEXT: stxv 0, 112(4)
|
|
; CHECK-NEXT: stxv 0, 128(4)
|
|
; CHECK-NEXT: stxv 0, 144(4)
|
|
; CHECK-NEXT: stxv 0, 160(4)
|
|
; CHECK-NEXT: stxv 0, 176(4)
|
|
; CHECK-NEXT: addi 4, 4, 384
|
|
; CHECK-NEXT: bdnz .LBB0_1
|
|
; CHECK-NEXT: # %bb.2: # %for.body
|
|
; CHECK-NEXT: stfs 1, 31872(3)
|
|
; CHECK-NEXT: stfs 1, 31876(3)
|
|
; CHECK-NEXT: stfs 1, 31880(3)
|
|
; CHECK-NEXT: stfs 1, 31884(3)
|
|
; CHECK-NEXT: stfs 1, 31888(3)
|
|
; CHECK-NEXT: stfs 1, 31892(3)
|
|
; CHECK-NEXT: stfs 1, 31896(3)
|
|
; CHECK-NEXT: stfs 1, 31900(3)
|
|
; CHECK-NEXT: stfs 1, 31904(3)
|
|
; CHECK-NEXT: stfs 1, 31908(3)
|
|
; CHECK-NEXT: stfs 1, 31912(3)
|
|
; CHECK-NEXT: stfs 1, 31916(3)
|
|
; CHECK-NEXT: stfs 1, 31920(3)
|
|
; CHECK-NEXT: stfs 1, 31924(3)
|
|
; CHECK-NEXT: stfs 1, 31928(3)
|
|
; CHECK-NEXT: stfs 1, 31932(3)
|
|
; CHECK-NEXT: stfs 1, 31936(3)
|
|
; CHECK-NEXT: stfs 1, 31940(3)
|
|
; CHECK-NEXT: stfs 1, 31944(3)
|
|
; CHECK-NEXT: stfs 1, 31948(3)
|
|
; CHECK-NEXT: stfs 1, 31952(3)
|
|
; CHECK-NEXT: stfs 1, 31956(3)
|
|
; CHECK-NEXT: stfs 1, 31960(3)
|
|
; CHECK-NEXT: stfs 1, 31964(3)
|
|
; CHECK-NEXT: stfs 1, 31968(3)
|
|
; CHECK-NEXT: stfs 1, 31972(3)
|
|
; CHECK-NEXT: stfs 1, 31976(3)
|
|
; CHECK-NEXT: stfs 1, 31980(3)
|
|
; CHECK-NEXT: stfs 1, 31984(3)
|
|
; CHECK-NEXT: stfs 1, 31988(3)
|
|
; CHECK-NEXT: stfs 1, 31992(3)
|
|
; CHECK-NEXT: stfs 1, 31996(3)
|
|
; CHECK-NEXT: blr
|
|
|
|
entry:
|
|
%broadcast.splatinsert16 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat17 = shufflevector <4 x float> %broadcast.splatinsert16, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert18 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat19 = shufflevector <4 x float> %broadcast.splatinsert18, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert20 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat21 = shufflevector <4 x float> %broadcast.splatinsert20, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert22 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat23 = shufflevector <4 x float> %broadcast.splatinsert22, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert24 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat25 = shufflevector <4 x float> %broadcast.splatinsert24, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert26 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat27 = shufflevector <4 x float> %broadcast.splatinsert26, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert28 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat29 = shufflevector <4 x float> %broadcast.splatinsert28, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert30 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat31 = shufflevector <4 x float> %broadcast.splatinsert30, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert32 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat33 = shufflevector <4 x float> %broadcast.splatinsert32, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert34 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat35 = shufflevector <4 x float> %broadcast.splatinsert34, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert36 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat37 = shufflevector <4 x float> %broadcast.splatinsert36, <4 x float> undef, <4 x i32> zeroinitializer
|
|
%broadcast.splatinsert38 = insertelement <4 x float> undef, float %d, i32 0
|
|
%broadcast.splat39 = shufflevector <4 x float> %broadcast.splatinsert38, <4 x float> undef, <4 x i32> zeroinitializer
|
|
br label %vector.body
|
|
|
|
vector.body: ; preds = %vector.body, %entry
|
|
%index = phi i64 [ 0, %entry ], [ %index.next.1, %vector.body ]
|
|
%0 = getelementptr inbounds float, float* %data, i64 %index
|
|
%1 = bitcast float* %0 to <4 x float>*
|
|
store <4 x float> %broadcast.splat17, <4 x float>* %1, align 4
|
|
%2 = getelementptr inbounds float, float* %0, i64 4
|
|
%3 = bitcast float* %2 to <4 x float>*
|
|
store <4 x float> %broadcast.splat19, <4 x float>* %3, align 4
|
|
%4 = getelementptr inbounds float, float* %0, i64 8
|
|
%5 = bitcast float* %4 to <4 x float>*
|
|
store <4 x float> %broadcast.splat21, <4 x float>* %5, align 4
|
|
%6 = getelementptr inbounds float, float* %0, i64 12
|
|
%7 = bitcast float* %6 to <4 x float>*
|
|
store <4 x float> %broadcast.splat23, <4 x float>* %7, align 4
|
|
%8 = getelementptr inbounds float, float* %0, i64 16
|
|
%9 = bitcast float* %8 to <4 x float>*
|
|
store <4 x float> %broadcast.splat25, <4 x float>* %9, align 4
|
|
%10 = getelementptr inbounds float, float* %0, i64 20
|
|
%11 = bitcast float* %10 to <4 x float>*
|
|
store <4 x float> %broadcast.splat27, <4 x float>* %11, align 4
|
|
%12 = getelementptr inbounds float, float* %0, i64 24
|
|
%13 = bitcast float* %12 to <4 x float>*
|
|
store <4 x float> %broadcast.splat29, <4 x float>* %13, align 4
|
|
%14 = getelementptr inbounds float, float* %0, i64 28
|
|
%15 = bitcast float* %14 to <4 x float>*
|
|
store <4 x float> %broadcast.splat31, <4 x float>* %15, align 4
|
|
%16 = getelementptr inbounds float, float* %0, i64 32
|
|
%17 = bitcast float* %16 to <4 x float>*
|
|
store <4 x float> %broadcast.splat33, <4 x float>* %17, align 4
|
|
%18 = getelementptr inbounds float, float* %0, i64 36
|
|
%19 = bitcast float* %18 to <4 x float>*
|
|
store <4 x float> %broadcast.splat35, <4 x float>* %19, align 4
|
|
%20 = getelementptr inbounds float, float* %0, i64 40
|
|
%21 = bitcast float* %20 to <4 x float>*
|
|
store <4 x float> %broadcast.splat37, <4 x float>* %21, align 4
|
|
%22 = getelementptr inbounds float, float* %0, i64 44
|
|
%23 = bitcast float* %22 to <4 x float>*
|
|
store <4 x float> %broadcast.splat39, <4 x float>* %23, align 4
|
|
%index.next = add nuw nsw i64 %index, 48
|
|
%24 = getelementptr inbounds float, float* %data, i64 %index.next
|
|
%25 = bitcast float* %24 to <4 x float>*
|
|
store <4 x float> %broadcast.splat17, <4 x float>* %25, align 4
|
|
%26 = getelementptr inbounds float, float* %24, i64 4
|
|
%27 = bitcast float* %26 to <4 x float>*
|
|
store <4 x float> %broadcast.splat19, <4 x float>* %27, align 4
|
|
%28 = getelementptr inbounds float, float* %24, i64 8
|
|
%29 = bitcast float* %28 to <4 x float>*
|
|
store <4 x float> %broadcast.splat21, <4 x float>* %29, align 4
|
|
%30 = getelementptr inbounds float, float* %24, i64 12
|
|
%31 = bitcast float* %30 to <4 x float>*
|
|
store <4 x float> %broadcast.splat23, <4 x float>* %31, align 4
|
|
%32 = getelementptr inbounds float, float* %24, i64 16
|
|
%33 = bitcast float* %32 to <4 x float>*
|
|
store <4 x float> %broadcast.splat25, <4 x float>* %33, align 4
|
|
%34 = getelementptr inbounds float, float* %24, i64 20
|
|
%35 = bitcast float* %34 to <4 x float>*
|
|
store <4 x float> %broadcast.splat27, <4 x float>* %35, align 4
|
|
%36 = getelementptr inbounds float, float* %24, i64 24
|
|
%37 = bitcast float* %36 to <4 x float>*
|
|
store <4 x float> %broadcast.splat29, <4 x float>* %37, align 4
|
|
%38 = getelementptr inbounds float, float* %24, i64 28
|
|
%39 = bitcast float* %38 to <4 x float>*
|
|
store <4 x float> %broadcast.splat31, <4 x float>* %39, align 4
|
|
%40 = getelementptr inbounds float, float* %24, i64 32
|
|
%41 = bitcast float* %40 to <4 x float>*
|
|
store <4 x float> %broadcast.splat33, <4 x float>* %41, align 4
|
|
%42 = getelementptr inbounds float, float* %24, i64 36
|
|
%43 = bitcast float* %42 to <4 x float>*
|
|
store <4 x float> %broadcast.splat35, <4 x float>* %43, align 4
|
|
%44 = getelementptr inbounds float, float* %24, i64 40
|
|
%45 = bitcast float* %44 to <4 x float>*
|
|
store <4 x float> %broadcast.splat37, <4 x float>* %45, align 4
|
|
%46 = getelementptr inbounds float, float* %24, i64 44
|
|
%47 = bitcast float* %46 to <4 x float>*
|
|
store <4 x float> %broadcast.splat39, <4 x float>* %47, align 4
|
|
%index.next.1 = add nuw nsw i64 %index, 96
|
|
%48 = icmp eq i64 %index.next.1, 7968
|
|
br i1 %48, label %for.body, label %vector.body
|
|
|
|
for.body: ; preds = %vector.body
|
|
%arrayidx = getelementptr inbounds float, float* %data, i64 7968
|
|
store float %d, float* %arrayidx, align 4
|
|
%arrayidx.1 = getelementptr inbounds float, float* %data, i64 7969
|
|
store float %d, float* %arrayidx.1, align 4
|
|
%arrayidx.2 = getelementptr inbounds float, float* %data, i64 7970
|
|
store float %d, float* %arrayidx.2, align 4
|
|
%arrayidx.3 = getelementptr inbounds float, float* %data, i64 7971
|
|
store float %d, float* %arrayidx.3, align 4
|
|
%arrayidx.4 = getelementptr inbounds float, float* %data, i64 7972
|
|
store float %d, float* %arrayidx.4, align 4
|
|
%arrayidx.5 = getelementptr inbounds float, float* %data, i64 7973
|
|
store float %d, float* %arrayidx.5, align 4
|
|
%arrayidx.6 = getelementptr inbounds float, float* %data, i64 7974
|
|
store float %d, float* %arrayidx.6, align 4
|
|
%arrayidx.7 = getelementptr inbounds float, float* %data, i64 7975
|
|
store float %d, float* %arrayidx.7, align 4
|
|
%arrayidx.8 = getelementptr inbounds float, float* %data, i64 7976
|
|
store float %d, float* %arrayidx.8, align 4
|
|
%arrayidx.9 = getelementptr inbounds float, float* %data, i64 7977
|
|
store float %d, float* %arrayidx.9, align 4
|
|
%arrayidx.10 = getelementptr inbounds float, float* %data, i64 7978
|
|
store float %d, float* %arrayidx.10, align 4
|
|
%arrayidx.11 = getelementptr inbounds float, float* %data, i64 7979
|
|
store float %d, float* %arrayidx.11, align 4
|
|
%arrayidx.12 = getelementptr inbounds float, float* %data, i64 7980
|
|
store float %d, float* %arrayidx.12, align 4
|
|
%arrayidx.13 = getelementptr inbounds float, float* %data, i64 7981
|
|
store float %d, float* %arrayidx.13, align 4
|
|
%arrayidx.14 = getelementptr inbounds float, float* %data, i64 7982
|
|
store float %d, float* %arrayidx.14, align 4
|
|
%arrayidx.15 = getelementptr inbounds float, float* %data, i64 7983
|
|
store float %d, float* %arrayidx.15, align 4
|
|
%arrayidx.16 = getelementptr inbounds float, float* %data, i64 7984
|
|
store float %d, float* %arrayidx.16, align 4
|
|
%arrayidx.17 = getelementptr inbounds float, float* %data, i64 7985
|
|
store float %d, float* %arrayidx.17, align 4
|
|
%arrayidx.18 = getelementptr inbounds float, float* %data, i64 7986
|
|
store float %d, float* %arrayidx.18, align 4
|
|
%arrayidx.19 = getelementptr inbounds float, float* %data, i64 7987
|
|
store float %d, float* %arrayidx.19, align 4
|
|
%arrayidx.20 = getelementptr inbounds float, float* %data, i64 7988
|
|
store float %d, float* %arrayidx.20, align 4
|
|
%arrayidx.21 = getelementptr inbounds float, float* %data, i64 7989
|
|
store float %d, float* %arrayidx.21, align 4
|
|
%arrayidx.22 = getelementptr inbounds float, float* %data, i64 7990
|
|
store float %d, float* %arrayidx.22, align 4
|
|
%arrayidx.23 = getelementptr inbounds float, float* %data, i64 7991
|
|
store float %d, float* %arrayidx.23, align 4
|
|
%arrayidx.24 = getelementptr inbounds float, float* %data, i64 7992
|
|
store float %d, float* %arrayidx.24, align 4
|
|
%arrayidx.25 = getelementptr inbounds float, float* %data, i64 7993
|
|
store float %d, float* %arrayidx.25, align 4
|
|
%arrayidx.26 = getelementptr inbounds float, float* %data, i64 7994
|
|
store float %d, float* %arrayidx.26, align 4
|
|
%arrayidx.27 = getelementptr inbounds float, float* %data, i64 7995
|
|
store float %d, float* %arrayidx.27, align 4
|
|
%arrayidx.28 = getelementptr inbounds float, float* %data, i64 7996
|
|
store float %d, float* %arrayidx.28, align 4
|
|
%arrayidx.29 = getelementptr inbounds float, float* %data, i64 7997
|
|
store float %d, float* %arrayidx.29, align 4
|
|
%arrayidx.30 = getelementptr inbounds float, float* %data, i64 7998
|
|
store float %d, float* %arrayidx.30, align 4
|
|
%arrayidx.31 = getelementptr inbounds float, float* %data, i64 7999
|
|
store float %d, float* %arrayidx.31, align 4
|
|
ret void
|
|
}
|