214 lines
8.5 KiB
LLVM
214 lines
8.5 KiB
LLVM
; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s
|
|
; RUN: llc -mtriple=thumbv8.1m.main -disable-arm-loloops=false -mattr=+lob -stop-after=arm-low-overhead-loops --verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK-GLOBAL
|
|
|
|
; Not implemented as a mir test so that changes the generic HardwareLoop can
|
|
; also be tested. These functions have been taken from
|
|
; Transforms/HardwareLoops/loop-guards.ll in which can be seen the generation
|
|
; of a few test.set intrinsics, but only one (ne_trip_count) gets generated
|
|
; here. Simplifications result in icmps changing and maybe also the CFG. So,
|
|
; TODO: Teach the HardwareLoops some better pattern recognition.
|
|
|
|
; CHECK-GLOBAL-NOT: DoLoopStart
|
|
; CHECK-GLOBAL-NOT: WhileLoopStart
|
|
; CHECK-GLOBAL-NOT: LoopEnd
|
|
|
|
; CHECK: ne_and_guard
|
|
; CHECK: body:
|
|
; CHECK: bb.0.entry:
|
|
; CHECK: t2CMPri renamable $lr, 0
|
|
; CHECK: tBcc %bb.4
|
|
; CHECK: bb.2.while.body.preheader:
|
|
; CHECK: $lr = t2DLS killed renamable $lr
|
|
; CHECK: bb.3.while.body:
|
|
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3
|
|
define void @ne_and_guard(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
|
|
entry:
|
|
%brmerge.demorgan = and i1 %t1, %t2
|
|
%cmp6 = icmp ne i32 %N, 0
|
|
%or.cond = and i1 %brmerge.demorgan, %cmp6
|
|
br i1 %or.cond, label %while.body, label %if.end
|
|
|
|
while.body: ; preds = %while.body, %entry
|
|
%i.09 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
|
|
%a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %entry ]
|
|
%b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %entry ]
|
|
%incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
|
|
%tmp = load i32, i32* %b.addr.07, align 4
|
|
%incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
|
|
store i32 %tmp, i32* %a.addr.08, align 4
|
|
%inc = add nuw i32 %i.09, 1
|
|
%exitcond = icmp eq i32 %inc, %N
|
|
br i1 %exitcond, label %if.end, label %while.body
|
|
|
|
if.end: ; preds = %while.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; TODO: This could generate WLS
|
|
; CHECK: ne_preheader
|
|
; CHECK: body:
|
|
; CHECK: bb.0.entry:
|
|
; CHECK: t2CMPri renamable $lr, 0
|
|
; CHECK: tBcc %bb.4
|
|
; CHECK: bb.2.while.body.preheader:
|
|
; CHECK: $lr = t2DLS killed renamable $lr
|
|
; CHECK: bb.3.while.body:
|
|
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3
|
|
define void @ne_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
|
|
entry:
|
|
%brmerge.demorgan = and i1 %t1, %t2
|
|
br i1 %brmerge.demorgan, label %while.preheader, label %if.end
|
|
|
|
while.preheader: ; preds = %entry
|
|
%cmp = icmp ne i32 %N, 0
|
|
br i1 %cmp, label %while.body, label %if.end
|
|
|
|
while.body: ; preds = %while.body, %while.preheader
|
|
%i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
|
|
%a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
|
|
%b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
|
|
%incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
|
|
%tmp = load i32, i32* %b.addr.07, align 4
|
|
%incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
|
|
store i32 %tmp, i32* %a.addr.08, align 4
|
|
%inc = add nuw i32 %i.09, 1
|
|
%exitcond = icmp eq i32 %inc, %N
|
|
br i1 %exitcond, label %if.end, label %while.body
|
|
|
|
if.end: ; preds = %while.body, %while.preheader, %entry
|
|
ret void
|
|
}
|
|
|
|
; TODO: This could generate WLS
|
|
; CHECK: eq_preheader
|
|
; CHECK: body:
|
|
; CHECK: bb.0.entry:
|
|
; CHECK: t2CMPri renamable $lr, 0
|
|
; CHECK: tBcc %bb.4
|
|
; CHECK: bb.2.while.body.preheader:
|
|
; CHECK: $lr = t2DLS killed renamable $lr
|
|
; CHECK: bb.3.while.body:
|
|
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3
|
|
define void @eq_preheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
|
|
entry:
|
|
%brmerge.demorgan = and i1 %t1, %t2
|
|
br i1 %brmerge.demorgan, label %while.preheader, label %if.end
|
|
|
|
while.preheader: ; preds = %entry
|
|
%cmp = icmp eq i32 %N, 0
|
|
br i1 %cmp, label %if.end, label %while.body
|
|
|
|
while.body: ; preds = %while.body, %while.preheader
|
|
%i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
|
|
%a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
|
|
%b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
|
|
%incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
|
|
%tmp = load i32, i32* %b.addr.07, align 4
|
|
%incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
|
|
store i32 %tmp, i32* %a.addr.08, align 4
|
|
%inc = add nuw i32 %i.09, 1
|
|
%exitcond = icmp eq i32 %inc, %N
|
|
br i1 %exitcond, label %if.end, label %while.body
|
|
|
|
if.end: ; preds = %while.body, %while.preheader, %entry
|
|
ret void
|
|
}
|
|
|
|
; TODO: This could generate WLS
|
|
; CHECK: ne_prepreheader
|
|
; CHECK: body:
|
|
; CHECK: bb.0.entry:
|
|
; CHECK: t2CMPri renamable $lr, 0
|
|
; CHECK: tBcc %bb.4
|
|
; CHECK: bb.2.while.body.preheader:
|
|
; CHECK: $lr = t2DLS killed renamable $lr
|
|
; CHECK: bb.3.while.body:
|
|
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.3
|
|
define void @ne_prepreheader(i1 zeroext %t1, i1 zeroext %t2, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
|
|
entry:
|
|
%cmp = icmp ne i32 %N, 0
|
|
br i1 %cmp, label %while.preheader, label %if.end
|
|
|
|
while.preheader: ; preds = %entry
|
|
%brmerge.demorgan = and i1 %t1, %t2
|
|
br i1 %brmerge.demorgan, label %while.body, label %if.end
|
|
|
|
while.body: ; preds = %while.body, %while.preheader
|
|
%i.09 = phi i32 [ %inc, %while.body ], [ 0, %while.preheader ]
|
|
%a.addr.08 = phi i32* [ %incdec.ptr3, %while.body ], [ %a, %while.preheader ]
|
|
%b.addr.07 = phi i32* [ %incdec.ptr, %while.body ], [ %b, %while.preheader ]
|
|
%incdec.ptr = getelementptr inbounds i32, i32* %b.addr.07, i32 1
|
|
%tmp = load i32, i32* %b.addr.07, align 4
|
|
%incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.08, i32 1
|
|
store i32 %tmp, i32* %a.addr.08, align 4
|
|
%inc = add nuw i32 %i.09, 1
|
|
%exitcond = icmp eq i32 %inc, %N
|
|
br i1 %exitcond, label %if.end, label %while.body
|
|
|
|
if.end: ; preds = %while.body, %while.preheader, %entry
|
|
ret void
|
|
}
|
|
|
|
; CHECK: be_ne
|
|
; CHECK: body:
|
|
; CHECK: bb.0.entry:
|
|
; CHECK: $lr = t2DLS killed renamable $r12
|
|
; CHECK: bb.2.do.body:
|
|
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
|
|
define void @be_ne(i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
|
|
entry:
|
|
%cmp = icmp ne i32 %N, 0
|
|
%sub = sub i32 %N, 1
|
|
%be = select i1 %cmp, i32 0, i32 %sub
|
|
%cmp.1 = icmp ne i32 %be, 0
|
|
br i1 %cmp.1, label %do.body, label %if.end
|
|
|
|
do.body: ; preds = %do.body, %entry
|
|
%b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %entry ]
|
|
%a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %entry ]
|
|
%i.0 = phi i32 [ %inc, %do.body ], [ 0, %entry ]
|
|
%incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1
|
|
%tmp = load i32, i32* %b.addr.0, align 4
|
|
%incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1
|
|
store i32 %tmp, i32* %a.addr.0, align 4
|
|
%inc = add nuw i32 %i.0, 1
|
|
%cmp.2 = icmp ult i32 %inc, %N
|
|
br i1 %cmp.2, label %do.body, label %if.end
|
|
|
|
if.end: ; preds = %do.body, %entry
|
|
ret void
|
|
}
|
|
|
|
; TODO: Remove the tMOVr in the preheader!
|
|
; CHECK: ne_trip_count
|
|
; CHECK: body:
|
|
; CHECK: bb.0.entry:
|
|
; CHECK: $lr = t2WLS $r3, %bb.3
|
|
; CHECK: bb.1.do.body.preheader:
|
|
; CHECK: $lr = tMOVr
|
|
; CHECK: bb.2.do.body:
|
|
; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.2
|
|
define void @ne_trip_count(i1 zeroext %t1, i32* nocapture %a, i32* nocapture readonly %b, i32 %N) {
|
|
entry:
|
|
br label %do.body.preheader
|
|
|
|
do.body.preheader:
|
|
%cmp = icmp ne i32 %N, 0
|
|
br i1 %cmp, label %do.body, label %if.end
|
|
|
|
do.body:
|
|
%b.addr.0 = phi i32* [ %incdec.ptr, %do.body ], [ %b, %do.body.preheader ]
|
|
%a.addr.0 = phi i32* [ %incdec.ptr3, %do.body ], [ %a, %do.body.preheader ]
|
|
%i.0 = phi i32 [ %inc, %do.body ], [ 0, %do.body.preheader ]
|
|
%incdec.ptr = getelementptr inbounds i32, i32* %b.addr.0, i32 1
|
|
%tmp = load i32, i32* %b.addr.0, align 4
|
|
%incdec.ptr3 = getelementptr inbounds i32, i32* %a.addr.0, i32 1
|
|
store i32 %tmp, i32* %a.addr.0, align 4
|
|
%inc = add nuw i32 %i.0, 1
|
|
%cmp.1 = icmp ult i32 %inc, %N
|
|
br i1 %cmp.1, label %do.body, label %if.end
|
|
|
|
if.end: ; preds = %do.body, %entry
|
|
ret void
|
|
}
|