llvm-for-llvmta/test/Transforms/LoopVectorize/vect-phiscev-sext-trunc.ll

; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s | FileCheck %s -check-prefix=VF8
; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s | FileCheck %s -check-prefix=VF1

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

; Given a loop with an induction variable which is being
; truncated/extended using casts that had been proven to
; be redundant under a runtime test, we want to make sure
; that these casts, do not get vectorized/scalarized/widened. 
; This is the case for inductions whose SCEV expression is
; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc"
; can be a result of the IR sequences we check below.
; 
; See also pr30654.
;

; Case1: Check the following induction pattern:
;
;  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
;  %sext = shl i32 %p.09, 24
;  %conv = ashr exact i32 %sext, 24
;  %add = add nsw i32 %conv, %step
; 
; This is the case in the following code:
;
; void doit1(int n, int step) {
;   int i;
;   char p = 0;
;   for (i = 0; i < n; i++) {
;      a[i] = p;
;      p = p + step;
;   }
; }
;
; The "ExtTrunc" IR sequence here is:
;  "%sext = shl i32 %p.09, 24"
;  "%conv = ashr exact i32 %sext, 24"
; We check that it does not appear in the vector loop body, whether
; we vectorize or scalarize the induction.
; In the case of widened induction, this means that the induction phi
; is directly used, without shl/ashr on the way.

; VF8-LABEL: @doit1
; VF8: vector.body:
; VF8: %vec.ind = phi <8 x i32>
; VF8: store <8 x i32> %vec.ind
; VF8: middle.block:            

; VF1-LABEL: @doit1
; VF1: vector.body:
; VF1-NOT: %{{.*}} = shl i32
; VF1: middle.block:            

@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16

define void @doit1(i32 %n, i32 %step) {
entry:
  %cmp7 = icmp sgt i32 %n, 0
  br i1 %cmp7, label %for.body.lr.ph, label %for.end

for.body.lr.ph:
  %wide.trip.count = zext i32 %n to i64
  br label %for.body

for.body:
  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
  %sext = shl i32 %p.09, 24
  %conv = ashr exact i32 %sext, 24
  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
  store i32 %conv, i32* %arrayidx, align 4
  %add = add nsw i32 %conv, %step
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
  br i1 %exitcond, label %for.end.loopexit, label %for.body

for.end.loopexit:
  br label %for.end

for.end:
  ret void
}


; Case2: Another variant of the above pattern is where the induction variable
; is used only for address compuation (i.e. it is a GEP index) and therefore
; the induction is not vectorized but rather only the step is widened. 
;
; This is the case in the following code, where the induction variable 'w_ix' 
; is only used to access the array 'in':
;
; void doit2(int *in, int *out, size_t size, size_t step)
; {
;    int w_ix = 0;
;    for (size_t offset = 0; offset < size; ++offset)
;     {
;        int w = in[w_ix];
;        out[offset] = w;
;        w_ix += step;
;     }
; }
;
; The "ExtTrunc" IR sequence here is similar to the previous case:
;  "%sext = shl i64 %w_ix.012, 32
;  %idxprom = ashr exact i64 %sext, 32"
; We check that it does not appear in the vector loop body, whether
; we widen or scalarize the induction.
; In the case of widened induction, this means that the induction phi
; is directly used, without shl/ashr on the way.

; VF8-LABEL: @doit2
; VF8: vector.body:
; VF8: %vec.ind = phi <8 x i64> 
; VF8: %{{.*}} = extractelement <8 x i64> %vec.ind
; VF8: middle.block:

; VF1-LABEL: @doit2
; VF1: vector.body:
; VF1-NOT: %{{.*}} = shl i64
; VF1: middle.block:
;

define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step)  {
entry:
  %cmp9 = icmp eq i64 %size, 0
  br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph

for.body.lr.ph:
  br label %for.body

for.cond.cleanup.loopexit:
  br label %for.cond.cleanup

for.cond.cleanup:
  ret void

for.body:
  %w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
  %offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
  %sext = shl i64 %w_ix.011, 32
  %idxprom = ashr exact i64 %sext, 32
  %arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom
  %0 = load i32, i32* %arrayidx, align 4
  %arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010
  store i32 %0, i32* %arrayidx1, align 4
  %add = add i64 %idxprom, %step
  %inc = add nuw i64 %offset.010, 1
  %exitcond = icmp eq i64 %inc, %size
  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
}

; Case3: Lastly, check also the following induction pattern:
; 
;  %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ]
;  %conv = and i32 %p.09, 255
;  %add = add nsw i32 %conv, %step
; 
; This is the case in the following code:
;
; int a[N];
; void doit3(int n, int step) {
;   int i;
;   unsigned char p = 0;
;   for (i = 0; i < n; i++) {
;      a[i] = p;
;      p = p + step;
;   }
; }
; 
; The "ExtTrunc" IR sequence here is:
;  "%conv = and i32 %p.09, 255".
; We check that it does not appear in the vector loop body, whether
; we vectorize or scalarize the induction.

; VF8-LABEL: @doit3
; VF8: vector.body:
; VF8: %vec.ind = phi <8 x i32>
; VF8: store <8 x i32> %vec.ind
; VF8: middle.block:            

; VF1-LABEL: @doit3
; VF1: vector.body:
; VF1-NOT: %{{.*}} = and i32 
; VF1: middle.block:            

define void @doit3(i32 %n, i32 %step) {
entry:
  %cmp7 = icmp sgt i32 %n, 0
  br i1 %cmp7, label %for.body.lr.ph, label %for.end

for.body.lr.ph:
  %wide.trip.count = zext i32 %n to i64
  br label %for.body

for.body:
  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
  %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
  %conv = and i32 %p.09, 255
  %arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv
  store i32 %conv, i32* %arrayidx, align 4
  %add = add nsw i32 %conv, %step
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
  br i1 %exitcond, label %for.end.loopexit, label %for.body

for.end.loopexit:
  br label %for.end

for.end:
  ret void
}
first commit 2022-04-25 10:02:23 +02:00			`; RUN: opt -S -loop-vectorize -force-vector-width=8 -force-vector-interleave=1 < %s \| FileCheck %s -check-prefix=VF8`
			`; RUN: opt -S -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 < %s \| FileCheck %s -check-prefix=VF1`

			`target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"`

			`; Given a loop with an induction variable which is being`
			`; truncated/extended using casts that had been proven to`
			`; be redundant under a runtime test, we want to make sure`
			`; that these casts, do not get vectorized/scalarized/widened.`
			`; This is the case for inductions whose SCEV expression is`
			`; of the form "ExtTrunc(%phi) + %step", where "ExtTrunc"`
			`; can be a result of the IR sequences we check below.`
			`;`
			`; See also pr30654.`
			`;`

			`; Case1: Check the following induction pattern:`
			`;`
			`; %p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]`
			`; %sext = shl i32 %p.09, 24`
			`; %conv = ashr exact i32 %sext, 24`
			`; %add = add nsw i32 %conv, %step`
			`;`
			`; This is the case in the following code:`
			`;`
			`; void doit1(int n, int step) {`
			`; int i;`
			`; char p = 0;`
			`; for (i = 0; i < n; i++) {`
			`; a[i] = p;`
			`; p = p + step;`
			`; }`
			`; }`
			`;`
			`; The "ExtTrunc" IR sequence here is:`
			`; "%sext = shl i32 %p.09, 24"`
			`; "%conv = ashr exact i32 %sext, 24"`
			`; We check that it does not appear in the vector loop body, whether`
			`; we vectorize or scalarize the induction.`
			`; In the case of widened induction, this means that the induction phi`
			`; is directly used, without shl/ashr on the way.`

			`; VF8-LABEL: @doit1`
			`; VF8: vector.body:`
			`; VF8: %vec.ind = phi <8 x i32>`
			`; VF8: store <8 x i32> %vec.ind`
			`; VF8: middle.block:`

			`; VF1-LABEL: @doit1`
			`; VF1: vector.body:`
			`; VF1-NOT: %{{.*}} = shl i32`
			`; VF1: middle.block:`

			`@a = common local_unnamed_addr global [250 x i32] zeroinitializer, align 16`

			`define void @doit1(i32 %n, i32 %step) {`
			`entry:`
			`%cmp7 = icmp sgt i32 %n, 0`
			`br i1 %cmp7, label %for.body.lr.ph, label %for.end`

			`for.body.lr.ph:`
			`%wide.trip.count = zext i32 %n to i64`
			`br label %for.body`

			`for.body:`
			`%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]`
			`%p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]`
			`%sext = shl i32 %p.09, 24`
			`%conv = ashr exact i32 %sext, 24`
			`%arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv`
			`store i32 %conv, i32* %arrayidx, align 4`
			`%add = add nsw i32 %conv, %step`
			`%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1`
			`%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count`
			`br i1 %exitcond, label %for.end.loopexit, label %for.body`

			`for.end.loopexit:`
			`br label %for.end`

			`for.end:`
			`ret void`
			`}`


			`; Case2: Another variant of the above pattern is where the induction variable`
			`; is used only for address compuation (i.e. it is a GEP index) and therefore`
			`; the induction is not vectorized but rather only the step is widened.`
			`;`
			`; This is the case in the following code, where the induction variable 'w_ix'`
			`; is only used to access the array 'in':`
			`;`
			`; void doit2(int in, int out, size_t size, size_t step)`
			`; {`
			`; int w_ix = 0;`
			`; for (size_t offset = 0; offset < size; ++offset)`
			`; {`
			`; int w = in[w_ix];`
			`; out[offset] = w;`
			`; w_ix += step;`
			`; }`
			`; }`
			`;`
			`; The "ExtTrunc" IR sequence here is similar to the previous case:`
			`; "%sext = shl i64 %w_ix.012, 32`
			`; %idxprom = ashr exact i64 %sext, 32"`
			`; We check that it does not appear in the vector loop body, whether`
			`; we widen or scalarize the induction.`
			`; In the case of widened induction, this means that the induction phi`
			`; is directly used, without shl/ashr on the way.`

			`; VF8-LABEL: @doit2`
			`; VF8: vector.body:`
			`; VF8: %vec.ind = phi <8 x i64>`
			`; VF8: %{{.*}} = extractelement <8 x i64> %vec.ind`
			`; VF8: middle.block:`

			`; VF1-LABEL: @doit2`
			`; VF1: vector.body:`
			`; VF1-NOT: %{{.*}} = shl i64`
			`; VF1: middle.block:`
			`;`

			`define void @doit2(i32* nocapture readonly %in, i32* nocapture %out, i64 %size, i64 %step) {`
			`entry:`
			`%cmp9 = icmp eq i64 %size, 0`
			`br i1 %cmp9, label %for.cond.cleanup, label %for.body.lr.ph`

			`for.body.lr.ph:`
			`br label %for.body`

			`for.cond.cleanup.loopexit:`
			`br label %for.cond.cleanup`

			`for.cond.cleanup:`
			`ret void`

			`for.body:`
			`%w_ix.011 = phi i64 [ 0, %for.body.lr.ph ], [ %add, %for.body ]`
			`%offset.010 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]`
			`%sext = shl i64 %w_ix.011, 32`
			`%idxprom = ashr exact i64 %sext, 32`
			`%arrayidx = getelementptr inbounds i32, i32* %in, i64 %idxprom`
			`%0 = load i32, i32* %arrayidx, align 4`
			`%arrayidx1 = getelementptr inbounds i32, i32* %out, i64 %offset.010`
			`store i32 %0, i32* %arrayidx1, align 4`
			`%add = add i64 %idxprom, %step`
			`%inc = add nuw i64 %offset.010, 1`
			`%exitcond = icmp eq i64 %inc, %size`
			`br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body`
			`}`

			`; Case3: Lastly, check also the following induction pattern:`
			`;`
			`; %p.09 = phi i32 [ %val0, %scalar.ph ], [ %add, %for.body ]`
			`; %conv = and i32 %p.09, 255`
			`; %add = add nsw i32 %conv, %step`
			`;`
			`; This is the case in the following code:`
			`;`
			`; int a[N];`
			`; void doit3(int n, int step) {`
			`; int i;`
			`; unsigned char p = 0;`
			`; for (i = 0; i < n; i++) {`
			`; a[i] = p;`
			`; p = p + step;`
			`; }`
			`; }`
			`;`
			`; The "ExtTrunc" IR sequence here is:`
			`; "%conv = and i32 %p.09, 255".`
			`; We check that it does not appear in the vector loop body, whether`
			`; we vectorize or scalarize the induction.`

			`; VF8-LABEL: @doit3`
			`; VF8: vector.body:`
			`; VF8: %vec.ind = phi <8 x i32>`
			`; VF8: store <8 x i32> %vec.ind`
			`; VF8: middle.block:`

			`; VF1-LABEL: @doit3`
			`; VF1: vector.body:`
			`; VF1-NOT: %{{.*}} = and i32`
			`; VF1: middle.block:`

			`define void @doit3(i32 %n, i32 %step) {`
			`entry:`
			`%cmp7 = icmp sgt i32 %n, 0`
			`br i1 %cmp7, label %for.body.lr.ph, label %for.end`

			`for.body.lr.ph:`
			`%wide.trip.count = zext i32 %n to i64`
			`br label %for.body`

			`for.body:`
			`%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]`
			`%p.09 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]`
			`%conv = and i32 %p.09, 255`
			`%arrayidx = getelementptr inbounds [250 x i32], [250 x i32]* @a, i64 0, i64 %indvars.iv`
			`store i32 %conv, i32* %arrayidx, align 4`
			`%add = add nsw i32 %conv, %step`
			`%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1`
			`%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count`
			`br i1 %exitcond, label %for.end.loopexit, label %for.body`

			`for.end.loopexit:`
			`br label %for.end`

			`for.end:`
			`ret void`
			`}`