llvm-for-llvmta/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll

; REQUIRES: asserts
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S < %s 2>&1 | FileCheck %s
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-DBG %s
; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-SVE %s
; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -force-target-supports-scalable-vectors=true -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck --check-prefix=CHECK-NO-MAX-VSCALE %s

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

; These tests validate the behaviour of scalable vectorization factor hints,
; where the following applies:
;
; * If the backend does not support scalable vectors, ignore the hint and let
;   the vectorizer pick a VF.
; * If there are no dependencies and assuming the VF is a power of 2 the VF
;   should be accepted. This applies to both fixed and scalable VFs.
; * If the dependency is too small to use scalable vectors, change the VF to
;   fixed, where existing behavior applies (clamping).
; * If scalable vectorization is feasible given the dependency and the VF is
;   valid, accept it. Otherwise, clamp to the max scalable VF.

; test1
;
; Scalable vectorization unfeasible, clamp VF from (4, scalable) -> (4, fixed).
;
; The pragma applied to this loop implies a scalable vector <vscale x 4 x i32>
; be used for vectorization. For fixed vectors the MaxVF=8, otherwise there
; would be a dependence between vector lanes for vectors greater than 256 bits.
;
; void test1(int *a, int *b, int N) {
;   #pragma clang loop vectorize(enable) vectorize_width(4, scalable)
;   for (int i=0; i<N; ++i) {
;     a[i + 8] = a[i] + b[i];
;   }
; }
;
; For scalable vectorization 'vscale' has to be considered, for this example
; unless max(vscale)=2 it's unsafe to vectorize. For SVE max(vscale)=16, check
; fixed-width vectorization is used instead.

; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
; CHECK-DBG: remark: <unknown>:0:0: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
; CHECK-DBG: LV: The max safe VF is: 8.
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test1
; CHECK: <4 x i32>
define void @test1(i32* %a, i32* %b) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
  %0 = load i32, i32* %arrayidx, align 4
  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
  %1 = load i32, i32* %arrayidx2, align 4
  %add = add nsw i32 %1, %0
  %2 = add nuw nsw i64 %iv, 8
  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
  store i32 %add, i32* %arrayidx5, align 4
  %iv.next = add nuw nsw i64 %iv, 1
  %exitcond.not = icmp eq i64 %iv.next, 1024
  br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0

exit:
  ret void
}

!0 = !{!0, !1, !2}
!1 = !{!"llvm.loop.vectorize.width", i32 4}
!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

; test2
;
; Scalable vectorization unfeasible, clamp VF from (8, scalable) -> (4, fixed).
;
; void test2(int *a, int *b, int N) {
;   #pragma clang loop vectorize(enable) vectorize_width(8, scalable)
;   for (int i=0; i<N; ++i) {
;     a[i + 4] = a[i] + b[i];
;   }
; }

; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
; CHECK-DBG: LV: The max safe VF is: 4.
; CHECK-DBG: LV: User VF=8 is unsafe, clamping to max safe VF=4.
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test2
; CHECK: <4 x i32>
define void @test2(i32* %a, i32* %b) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
  %0 = load i32, i32* %arrayidx, align 4
  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
  %1 = load i32, i32* %arrayidx2, align 4
  %add = add nsw i32 %1, %0
  %2 = add nuw nsw i64 %iv, 4
  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
  store i32 %add, i32* %arrayidx5, align 4
  %iv.next = add nuw nsw i64 %iv, 1
  %exitcond.not = icmp eq i64 %iv.next, 1024
  br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !3

exit:
  ret void
}

!3 = !{!3, !4, !5}
!4 = !{!"llvm.loop.vectorize.width", i32 8}
!5 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

; test3
;
; Scalable vectorization feasible and the VF is valid.
;
; Specifies a vector of <vscale x 2 x i32>, i.e. maximum of 32 x i32 with 2
; words per 128-bits (unpacked).
;
; void test3(int *a, int *b, int N) {
;   #pragma clang loop vectorize(enable) vectorize_width(2, scalable)
;   for (int i=0; i<N; ++i) {
;     a[i + 32] = a[i] + b[i];
;   }
; }
;
; Max fixed VF=32, Max scalable VF=2, safe to vectorize.

; CHECK-DBG-LABEL: LV: Checking a loop in "test3"
; CHECK-DBG: LV: The max safe VF is: vscale x 2.
; CHECK-DBG: LV: Using user VF vscale x 2.
; CHECK-LABEL: @test3
; CHECK: <vscale x 2 x i32>
define void @test3(i32* %a, i32* %b) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
  %0 = load i32, i32* %arrayidx, align 4
  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
  %1 = load i32, i32* %arrayidx2, align 4
  %add = add nsw i32 %1, %0
  %2 = add nuw nsw i64 %iv, 32
  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
  store i32 %add, i32* %arrayidx5, align 4
  %iv.next = add nuw nsw i64 %iv, 1
  %exitcond.not = icmp eq i64 %iv.next, 1024
  br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6

exit:
  ret void
}

!6 = !{!6, !7, !8}
!7 = !{!"llvm.loop.vectorize.width", i32 2}
!8 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

; test4
;
; Scalable vectorization feasible, but the VF is unsafe. Should clamp.
;
; Specifies a vector of <vscale x 4 x i32>, i.e. maximum of 64 x i32 with 4
; words per 128-bits (packed).
;
; void test4(int *a, int *b, int N) {
;   #pragma clang loop vectorize(enable) vectorize_width(4, scalable)
;   for (int i=0; i<N; ++i) {
;     a[i + 32] = a[i] + b[i];
;   }
; }
;
; Max fixed VF=32, Max scalable VF=2, unsafe to vectorize. Should clamp to 2.

; CHECK-DBG-LABEL: LV: Checking a loop in "test4"
; CHECK-DBG: LV: The max safe VF is: vscale x 2.
; CHECK-DBG: LV: User VF=vscale x 4 is unsafe, clamping to max safe VF=vscale x 2.
; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 4 is unsafe, clamping to maximum safe vectorization factor vscale x 2
; CHECK-DBG: LV: Using max VF vscale x 2
; CHECK-LABEL: @test4
; CHECK: <vscale x 2 x i32>
define void @test4(i32* %a, i32* %b) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
  %0 = load i32, i32* %arrayidx, align 4
  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
  %1 = load i32, i32* %arrayidx2, align 4
  %add = add nsw i32 %1, %0
  %2 = add nuw nsw i64 %iv, 32
  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
  store i32 %add, i32* %arrayidx5, align 4
  %iv.next = add nuw nsw i64 %iv, 1
  %exitcond.not = icmp eq i64 %iv.next, 1024
  br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !9

exit:
  ret void
}

!9 = !{!9, !10, !11}
!10 = !{!"llvm.loop.vectorize.width", i32 4}
!11 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

; test5
;
; Scalable vectorization feasible and the VF is valid.
;
; Specifies a vector of <vscale x 4 x i32>, i.e. maximum of 64 x i32 with 4
; words per 128-bits (packed).
;
; void test5(int *a, int *b, int N) {
;   #pragma clang loop vectorize(enable) vectorize_width(4, scalable)
;   for (int i=0; i<N; ++i) {
;     a[i + 128] = a[i] + b[i];
;   }
; }
;
; Max fixed VF=128, Max scalable VF=8, safe to vectorize.

; CHECK-DBG-LABEL: LV: Checking a loop in "test5"
; CHECK-DBG: LV: The max safe VF is: vscale x 8.
; CHECK-DBG: LV: Using user VF vscale x 4
; CHECK-LABEL: @test5
; CHECK: <vscale x 4 x i32>
define void @test5(i32* %a, i32* %b) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
  %0 = load i32, i32* %arrayidx, align 4
  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
  %1 = load i32, i32* %arrayidx2, align 4
  %add = add nsw i32 %1, %0
  %2 = add nuw nsw i64 %iv, 128
  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
  store i32 %add, i32* %arrayidx5, align 4
  %iv.next = add nuw nsw i64 %iv, 1
  %exitcond.not = icmp eq i64 %iv.next, 1024
  br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !12

exit:
  ret void
}

!12 = !{!12, !13, !14}
!13 = !{!"llvm.loop.vectorize.width", i32 4}
!14 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

; test6
;
; Scalable vectorization feasible, but the VF is unsafe. Should clamp.
;
; Specifies a vector of <vscale x 16 x i32>, i.e. maximum of 256 x i32.
;
; void test6(int *a, int *b, int N) {
;   #pragma clang loop vectorize(enable) vectorize_width(16, scalable)
;   for (int i=0; i<N; ++i) {
;     a[i + 128] = a[i] + b[i];
;   }
; }
;
; Max fixed VF=128, Max scalable VF=8, unsafe to vectorize. Should clamp to 8.

; CHECK-DBG-LABEL: LV: Checking a loop in "test6"
; CHECK-DBG: LV: The max safe VF is: vscale x 8.
; CHECK-DBG: LV: User VF=vscale x 16 is unsafe, clamping to max safe VF=vscale x 8.
; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 16 is unsafe, clamping to maximum safe vectorization factor vscale x 8
; CHECK-DBG: LV: Using max VF vscale x 8
; CHECK-LABEL: @test6
; CHECK: <vscale x 8 x i32>
define void @test6(i32* %a, i32* %b) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
  %0 = load i32, i32* %arrayidx, align 4
  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
  %1 = load i32, i32* %arrayidx2, align 4
  %add = add nsw i32 %1, %0
  %2 = add nuw nsw i64 %iv, 128
  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
  store i32 %add, i32* %arrayidx5, align 4
  %iv.next = add nuw nsw i64 %iv, 1
  %exitcond.not = icmp eq i64 %iv.next, 1024
  br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !15

exit:
  ret void
}

!15 = !{!15, !16, !17}
!16 = !{!"llvm.loop.vectorize.width", i32 16}
!17 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

; CHECK-NO-SVE-LABEL: LV: Checking a loop in "test_no_sve"
; CHECK-NO-SVE: LV: Ignoring VF=vscale x 4 because target does not support scalable vectors.
; CHECK-NO-SVE: remark: <unknown>:0:0: Ignoring VF=vscale x 4 because target does not support scalable vectors.
; CHECK-NO-SVE: LV: Selecting VF: 4.
; CHECK-NO-SVE: <4 x i32>
; CHECK-NO-SVE-NOT: <vscale x 4 x i32>
define void @test_no_sve(i32* %a, i32* %b) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
  %0 = load i32, i32* %arrayidx, align 4
  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
  %1 = load i32, i32* %arrayidx2, align 4
  %add = add nsw i32 %1, %0
  store i32 %add, i32* %arrayidx, align 4
  %iv.next = add nuw nsw i64 %iv, 1
  %exitcond.not = icmp eq i64 %iv.next, 1024
  br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !18

exit:
  ret void
}

!18 = !{!18, !19, !20}
!19 = !{!"llvm.loop.vectorize.width", i32 4}
!20 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}

; Test the LV falls back to fixed-width vectorization if scalable vectors are
; supported but max vscale is undefined.
;
; CHECK-NO-MAX-VSCALE-LABEL: LV: Checking a loop in "test_no_max_vscale"
; CHECK-NO-MAX-VSCALE: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.
; CEHCK-NO-MAX-VSCALE: The max safe VF is: 4.
; CHECK-NO-MAX-VSCALE: LV: Selecting VF: 4.
; CHECK-NO-MAX-VSCALE: <4 x i32>
define void @test_no_max_vscale(i32* %a, i32* %b) {
entry:
  br label %loop

loop:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
  %0 = load i32, i32* %arrayidx, align 4
  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
  %1 = load i32, i32* %arrayidx2, align 4
  %add = add nsw i32 %1, %0
  %2 = add nuw nsw i64 %iv, 4
  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
  store i32 %add, i32* %arrayidx5, align 4
  %iv.next = add nuw nsw i64 %iv, 1
  %exitcond.not = icmp eq i64 %iv.next, 1024
  br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !21

exit:
  ret void
}

!21 = !{!21, !22, !23}
!22 = !{!"llvm.loop.vectorize.width", i32 4}
!23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
first commit 2022-04-25 10:02:23 +02:00			`; REQUIRES: asserts`
			`; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -S < %s 2>&1 \| FileCheck %s`
			`; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 \| FileCheck --check-prefix=CHECK-DBG %s`
			`; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 \| FileCheck --check-prefix=CHECK-NO-SVE %s`
			`; RUN: opt -mtriple=aarch64-none-linux-gnu -loop-vectorize -force-target-supports-scalable-vectors=true -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 \| FileCheck --check-prefix=CHECK-NO-MAX-VSCALE %s`

			`target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"`

			`; These tests validate the behaviour of scalable vectorization factor hints,`
			`; where the following applies:`
			`;`
			`; * If the backend does not support scalable vectors, ignore the hint and let`
			`; the vectorizer pick a VF.`
			`; * If there are no dependencies and assuming the VF is a power of 2 the VF`
			`; should be accepted. This applies to both fixed and scalable VFs.`
			`; * If the dependency is too small to use scalable vectors, change the VF to`
			`; fixed, where existing behavior applies (clamping).`
			`; * If scalable vectorization is feasible given the dependency and the VF is`
			`; valid, accept it. Otherwise, clamp to the max scalable VF.`

			`; test1`
			`;`
			`; Scalable vectorization unfeasible, clamp VF from (4, scalable) -> (4, fixed).`
			`;`
			`; The pragma applied to this loop implies a scalable vector <vscale x 4 x i32>`
			`; be used for vectorization. For fixed vectors the MaxVF=8, otherwise there`
			`; would be a dependence between vector lanes for vectors greater than 256 bits.`
			`;`
			`; void test1(int a, int b, int N) {`
			`; #pragma clang loop vectorize(enable) vectorize_width(4, scalable)`
			`; for (int i=0; i<N; ++i) {`
			`; a[i + 8] = a[i] + b[i];`
			`; }`
			`; }`
			`;`
			`; For scalable vectorization 'vscale' has to be considered, for this example`
			`; unless max(vscale)=2 it's unsafe to vectorize. For SVE max(vscale)=16, check`
			`; fixed-width vectorization is used instead.`

			`; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.`
			`; CHECK-DBG: remark: <unknown>:0:0: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.`
			`; CHECK-DBG: LV: The max safe VF is: 8.`
			`; CHECK-DBG: LV: Selecting VF: 4.`
			`; CHECK-LABEL: @test1`
			`; CHECK: <4 x i32>`
			`define void @test1(i32* %a, i32* %b) {`
			`entry:`
			`br label %loop`

			`loop:`
			`%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]`
			`%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv`
			`%0 = load i32, i32* %arrayidx, align 4`
			`%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv`
			`%1 = load i32, i32* %arrayidx2, align 4`
			`%add = add nsw i32 %1, %0`
			`%2 = add nuw nsw i64 %iv, 8`
			`%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2`
			`store i32 %add, i32* %arrayidx5, align 4`
			`%iv.next = add nuw nsw i64 %iv, 1`
			`%exitcond.not = icmp eq i64 %iv.next, 1024`
			`br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0`

			`exit:`
			`ret void`
			`}`

			`!0 = !{!0, !1, !2}`
			`!1 = !{!"llvm.loop.vectorize.width", i32 4}`
			`!2 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}`

			`; test2`
			`;`
			`; Scalable vectorization unfeasible, clamp VF from (8, scalable) -> (4, fixed).`
			`;`
			`; void test2(int a, int b, int N) {`
			`; #pragma clang loop vectorize(enable) vectorize_width(8, scalable)`
			`; for (int i=0; i<N; ++i) {`
			`; a[i + 4] = a[i] + b[i];`
			`; }`
			`; }`

			`; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.`
			`; CHECK-DBG: LV: The max safe VF is: 4.`
			`; CHECK-DBG: LV: User VF=8 is unsafe, clamping to max safe VF=4.`
			`; CHECK-DBG: LV: Selecting VF: 4.`
			`; CHECK-LABEL: @test2`
			`; CHECK: <4 x i32>`
			`define void @test2(i32* %a, i32* %b) {`
			`entry:`
			`br label %loop`

			`loop:`
			`%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]`
			`%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv`
			`%0 = load i32, i32* %arrayidx, align 4`
			`%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv`
			`%1 = load i32, i32* %arrayidx2, align 4`
			`%add = add nsw i32 %1, %0`
			`%2 = add nuw nsw i64 %iv, 4`
			`%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2`
			`store i32 %add, i32* %arrayidx5, align 4`
			`%iv.next = add nuw nsw i64 %iv, 1`
			`%exitcond.not = icmp eq i64 %iv.next, 1024`
			`br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !3`

			`exit:`
			`ret void`
			`}`

			`!3 = !{!3, !4, !5}`
			`!4 = !{!"llvm.loop.vectorize.width", i32 8}`
			`!5 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}`

			`; test3`
			`;`
			`; Scalable vectorization feasible and the VF is valid.`
			`;`
			`; Specifies a vector of <vscale x 2 x i32>, i.e. maximum of 32 x i32 with 2`
			`; words per 128-bits (unpacked).`
			`;`
			`; void test3(int a, int b, int N) {`
			`; #pragma clang loop vectorize(enable) vectorize_width(2, scalable)`
			`; for (int i=0; i<N; ++i) {`
			`; a[i + 32] = a[i] + b[i];`
			`; }`
			`; }`
			`;`
			`; Max fixed VF=32, Max scalable VF=2, safe to vectorize.`

			`; CHECK-DBG-LABEL: LV: Checking a loop in "test3"`
			`; CHECK-DBG: LV: The max safe VF is: vscale x 2.`
			`; CHECK-DBG: LV: Using user VF vscale x 2.`
			`; CHECK-LABEL: @test3`
			`; CHECK: <vscale x 2 x i32>`
			`define void @test3(i32* %a, i32* %b) {`
			`entry:`
			`br label %loop`

			`loop:`
			`%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]`
			`%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv`
			`%0 = load i32, i32* %arrayidx, align 4`
			`%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv`
			`%1 = load i32, i32* %arrayidx2, align 4`
			`%add = add nsw i32 %1, %0`
			`%2 = add nuw nsw i64 %iv, 32`
			`%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2`
			`store i32 %add, i32* %arrayidx5, align 4`
			`%iv.next = add nuw nsw i64 %iv, 1`
			`%exitcond.not = icmp eq i64 %iv.next, 1024`
			`br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !6`

			`exit:`
			`ret void`
			`}`

			`!6 = !{!6, !7, !8}`
			`!7 = !{!"llvm.loop.vectorize.width", i32 2}`
			`!8 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}`

			`; test4`
			`;`
			`; Scalable vectorization feasible, but the VF is unsafe. Should clamp.`
			`;`
			`; Specifies a vector of <vscale x 4 x i32>, i.e. maximum of 64 x i32 with 4`
			`; words per 128-bits (packed).`
			`;`
			`; void test4(int a, int b, int N) {`
			`; #pragma clang loop vectorize(enable) vectorize_width(4, scalable)`
			`; for (int i=0; i<N; ++i) {`
			`; a[i + 32] = a[i] + b[i];`
			`; }`
			`; }`
			`;`
			`; Max fixed VF=32, Max scalable VF=2, unsafe to vectorize. Should clamp to 2.`

			`; CHECK-DBG-LABEL: LV: Checking a loop in "test4"`
			`; CHECK-DBG: LV: The max safe VF is: vscale x 2.`
			`; CHECK-DBG: LV: User VF=vscale x 4 is unsafe, clamping to max safe VF=vscale x 2.`
			`; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 4 is unsafe, clamping to maximum safe vectorization factor vscale x 2`
			`; CHECK-DBG: LV: Using max VF vscale x 2`
			`; CHECK-LABEL: @test4`
			`; CHECK: <vscale x 2 x i32>`
			`define void @test4(i32* %a, i32* %b) {`
			`entry:`
			`br label %loop`

			`loop:`
			`%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]`
			`%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv`
			`%0 = load i32, i32* %arrayidx, align 4`
			`%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv`
			`%1 = load i32, i32* %arrayidx2, align 4`
			`%add = add nsw i32 %1, %0`
			`%2 = add nuw nsw i64 %iv, 32`
			`%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2`
			`store i32 %add, i32* %arrayidx5, align 4`
			`%iv.next = add nuw nsw i64 %iv, 1`
			`%exitcond.not = icmp eq i64 %iv.next, 1024`
			`br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !9`

			`exit:`
			`ret void`
			`}`

			`!9 = !{!9, !10, !11}`
			`!10 = !{!"llvm.loop.vectorize.width", i32 4}`
			`!11 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}`

			`; test5`
			`;`
			`; Scalable vectorization feasible and the VF is valid.`
			`;`
			`; Specifies a vector of <vscale x 4 x i32>, i.e. maximum of 64 x i32 with 4`
			`; words per 128-bits (packed).`
			`;`
			`; void test5(int a, int b, int N) {`
			`; #pragma clang loop vectorize(enable) vectorize_width(4, scalable)`
			`; for (int i=0; i<N; ++i) {`
			`; a[i + 128] = a[i] + b[i];`
			`; }`
			`; }`
			`;`
			`; Max fixed VF=128, Max scalable VF=8, safe to vectorize.`

			`; CHECK-DBG-LABEL: LV: Checking a loop in "test5"`
			`; CHECK-DBG: LV: The max safe VF is: vscale x 8.`
			`; CHECK-DBG: LV: Using user VF vscale x 4`
			`; CHECK-LABEL: @test5`
			`; CHECK: <vscale x 4 x i32>`
			`define void @test5(i32* %a, i32* %b) {`
			`entry:`
			`br label %loop`

			`loop:`
			`%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]`
			`%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv`
			`%0 = load i32, i32* %arrayidx, align 4`
			`%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv`
			`%1 = load i32, i32* %arrayidx2, align 4`
			`%add = add nsw i32 %1, %0`
			`%2 = add nuw nsw i64 %iv, 128`
			`%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2`
			`store i32 %add, i32* %arrayidx5, align 4`
			`%iv.next = add nuw nsw i64 %iv, 1`
			`%exitcond.not = icmp eq i64 %iv.next, 1024`
			`br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !12`

			`exit:`
			`ret void`
			`}`

			`!12 = !{!12, !13, !14}`
			`!13 = !{!"llvm.loop.vectorize.width", i32 4}`
			`!14 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}`

			`; test6`
			`;`
			`; Scalable vectorization feasible, but the VF is unsafe. Should clamp.`
			`;`
			`; Specifies a vector of <vscale x 16 x i32>, i.e. maximum of 256 x i32.`
			`;`
			`; void test6(int a, int b, int N) {`
			`; #pragma clang loop vectorize(enable) vectorize_width(16, scalable)`
			`; for (int i=0; i<N; ++i) {`
			`; a[i + 128] = a[i] + b[i];`
			`; }`
			`; }`
			`;`
			`; Max fixed VF=128, Max scalable VF=8, unsafe to vectorize. Should clamp to 8.`

			`; CHECK-DBG-LABEL: LV: Checking a loop in "test6"`
			`; CHECK-DBG: LV: The max safe VF is: vscale x 8.`
			`; CHECK-DBG: LV: User VF=vscale x 16 is unsafe, clamping to max safe VF=vscale x 8.`
			`; CHECK-DBG: remark: <unknown>:0:0: User-specified vectorization factor vscale x 16 is unsafe, clamping to maximum safe vectorization factor vscale x 8`
			`; CHECK-DBG: LV: Using max VF vscale x 8`
			`; CHECK-LABEL: @test6`
			`; CHECK: <vscale x 8 x i32>`
			`define void @test6(i32* %a, i32* %b) {`
			`entry:`
			`br label %loop`

			`loop:`
			`%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]`
			`%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv`
			`%0 = load i32, i32* %arrayidx, align 4`
			`%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv`
			`%1 = load i32, i32* %arrayidx2, align 4`
			`%add = add nsw i32 %1, %0`
			`%2 = add nuw nsw i64 %iv, 128`
			`%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2`
			`store i32 %add, i32* %arrayidx5, align 4`
			`%iv.next = add nuw nsw i64 %iv, 1`
			`%exitcond.not = icmp eq i64 %iv.next, 1024`
			`br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !15`

			`exit:`
			`ret void`
			`}`

			`!15 = !{!15, !16, !17}`
			`!16 = !{!"llvm.loop.vectorize.width", i32 16}`
			`!17 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}`

			`; CHECK-NO-SVE-LABEL: LV: Checking a loop in "test_no_sve"`
			`; CHECK-NO-SVE: LV: Ignoring VF=vscale x 4 because target does not support scalable vectors.`
			`; CHECK-NO-SVE: remark: <unknown>:0:0: Ignoring VF=vscale x 4 because target does not support scalable vectors.`
			`; CHECK-NO-SVE: LV: Selecting VF: 4.`
			`; CHECK-NO-SVE: <4 x i32>`
			`; CHECK-NO-SVE-NOT: <vscale x 4 x i32>`
			`define void @test_no_sve(i32* %a, i32* %b) {`
			`entry:`
			`br label %loop`

			`loop:`
			`%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]`
			`%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv`
			`%0 = load i32, i32* %arrayidx, align 4`
			`%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv`
			`%1 = load i32, i32* %arrayidx2, align 4`
			`%add = add nsw i32 %1, %0`
			`store i32 %add, i32* %arrayidx, align 4`
			`%iv.next = add nuw nsw i64 %iv, 1`
			`%exitcond.not = icmp eq i64 %iv.next, 1024`
			`br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !18`

			`exit:`
			`ret void`
			`}`

			`!18 = !{!18, !19, !20}`
			`!19 = !{!"llvm.loop.vectorize.width", i32 4}`
			`!20 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}`

			`; Test the LV falls back to fixed-width vectorization if scalable vectors are`
			`; supported but max vscale is undefined.`
			`;`
			`; CHECK-NO-MAX-VSCALE-LABEL: LV: Checking a loop in "test_no_max_vscale"`
			`; CHECK-NO-MAX-VSCALE: LV: Max legal vector width too small, scalable vectorization unfeasible. Using fixed-width vectorization instead.`
			`; CEHCK-NO-MAX-VSCALE: The max safe VF is: 4.`
			`; CHECK-NO-MAX-VSCALE: LV: Selecting VF: 4.`
			`; CHECK-NO-MAX-VSCALE: <4 x i32>`
			`define void @test_no_max_vscale(i32* %a, i32* %b) {`
			`entry:`
			`br label %loop`

			`loop:`
			`%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]`
			`%arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv`
			`%0 = load i32, i32* %arrayidx, align 4`
			`%arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv`
			`%1 = load i32, i32* %arrayidx2, align 4`
			`%add = add nsw i32 %1, %0`
			`%2 = add nuw nsw i64 %iv, 4`
			`%arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2`
			`store i32 %add, i32* %arrayidx5, align 4`
			`%iv.next = add nuw nsw i64 %iv, 1`
			`%exitcond.not = icmp eq i64 %iv.next, 1024`
			`br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !21`

			`exit:`
			`ret void`
			`}`

			`!21 = !{!21, !22, !23}`
			`!22 = !{!"llvm.loop.vectorize.width", i32 4}`
			`!23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}`