; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -o - -mtriple=i686-unknown-unknown %s | FileCheck %s --check-prefix=X32 ; RUN: llc -o - -mtriple=x86_64-unknown-unknown %s | FileCheck %s --check-prefix=X64 ; ; Test patterns that require preserving and restoring flags. @b = common dso_local global i8 0, align 1 @c = common dso_local global i32 0, align 4 @a = common dso_local global i8 0, align 1 @d = common dso_local global i8 0, align 1 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 declare dso_local void @external(i32) ; A test that re-uses flags in interesting ways due to volatile accesses. ; Specifically, the first increment's flags are reused for the branch despite ; being clobbered by the second increment. define dso_local i32 @test1() nounwind { ; X32-LABEL: test1: ; X32: # %bb.0: # %entry ; X32-NEXT: movb b, %cl ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: incb %al ; X32-NEXT: movb %al, b ; X32-NEXT: incl c ; X32-NEXT: sete %dl ; X32-NEXT: movb a, %ah ; X32-NEXT: movb %ah, %ch ; X32-NEXT: incb %ch ; X32-NEXT: cmpb %cl, %ah ; X32-NEXT: sete d ; X32-NEXT: movb %ch, a ; X32-NEXT: testb %dl, %dl ; X32-NEXT: jne .LBB0_2 ; X32-NEXT: # %bb.1: # %if.then ; X32-NEXT: movsbl %al, %eax ; X32-NEXT: pushl %eax ; X32-NEXT: calll external ; X32-NEXT: addl $4, %esp ; X32-NEXT: .LBB0_2: # %if.end ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: retl ; ; X64-LABEL: test1: ; X64: # %bb.0: # %entry ; X64-NEXT: pushq %rax ; X64-NEXT: movb {{.*}}(%rip), %cl ; X64-NEXT: leal 1(%rcx), %eax ; X64-NEXT: movb %al, {{.*}}(%rip) ; X64-NEXT: incl {{.*}}(%rip) ; X64-NEXT: sete %dl ; X64-NEXT: movb {{.*}}(%rip), %sil ; X64-NEXT: leal 1(%rsi), %edi ; X64-NEXT: cmpb %cl, %sil ; X64-NEXT: sete {{.*}}(%rip) ; X64-NEXT: movb %dil, {{.*}}(%rip) ; X64-NEXT: testb %dl, %dl ; X64-NEXT: jne .LBB0_2 ; X64-NEXT: # %bb.1: # %if.then ; X64-NEXT: movsbl %al, %edi ; X64-NEXT: callq external ; X64-NEXT: .LBB0_2: # %if.end ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: popq %rcx ; X64-NEXT: retq entry: %bval = load i8, i8* @b %inc = add i8 %bval, 1 store volatile i8 %inc, i8* @b %cval = load volatile i32, i32* @c %inc1 = add nsw i32 %cval, 1 store volatile i32 %inc1, i32* @c %aval = load volatile i8, i8* @a %inc2 = add i8 %aval, 1 store volatile i8 %inc2, i8* @a %cmp = icmp eq i8 %aval, %bval %conv5 = zext i1 %cmp to i8 store i8 %conv5, i8* @d %tobool = icmp eq i32 %inc1, 0 br i1 %tobool, label %if.end, label %if.then if.then: %conv6 = sext i8 %inc to i32 call void @external(i32 %conv6) br label %if.end if.end: ret i32 0 } ; Preserve increment flags across a call. define dso_local i32 @test2(i32* %ptr) nounwind { ; X32-LABEL: test2: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: incl (%eax) ; X32-NEXT: setne %bl ; X32-NEXT: pushl $42 ; X32-NEXT: calll external ; X32-NEXT: addl $4, %esp ; X32-NEXT: testb %bl, %bl ; X32-NEXT: jne .LBB1_2 ; X32-NEXT: # %bb.1: # %then ; X32-NEXT: movl $64, %eax ; X32-NEXT: popl %ebx ; X32-NEXT: retl ; X32-NEXT: .LBB1_2: # %else ; X32-NEXT: xorl %eax, %eax ; X32-NEXT: popl %ebx ; X32-NEXT: retl ; ; X64-LABEL: test2: ; X64: # %bb.0: # %entry ; X64-NEXT: pushq %rbx ; X64-NEXT: incl (%rdi) ; X64-NEXT: setne %bl ; X64-NEXT: movl $42, %edi ; X64-NEXT: callq external ; X64-NEXT: testb %bl, %bl ; X64-NEXT: jne .LBB1_2 ; X64-NEXT: # %bb.1: # %then ; X64-NEXT: movl $64, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq ; X64-NEXT: .LBB1_2: # %else ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: retq entry: %val = load i32, i32* %ptr %inc = add i32 %val, 1 store i32 %inc, i32* %ptr %cmp = icmp eq i32 %inc, 0 call void @external(i32 42) br i1 %cmp, label %then, label %else then: ret i32 64 else: ret i32 0 } declare dso_local void @external_a() declare dso_local void @external_b() ; This lowers to a conditional tail call instead of a conditional branch. This ; is tricky because we can only do this from a leaf function, and so we have to ; use volatile stores similar to test1 to force the save and restore of ; a condition without calling another function. We then set up subsequent calls ; in tail position. define dso_local void @test_tail_call(i32* %ptr) nounwind optsize { ; X32-LABEL: test_tail_call: ; X32: # %bb.0: # %entry ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: incl (%eax) ; X32-NEXT: setne %al ; X32-NEXT: incb a ; X32-NEXT: sete d ; X32-NEXT: testb %al, %al ; X32-NEXT: jne external_b # TAILCALL ; X32-NEXT: # %bb.1: # %then ; X32-NEXT: jmp external_a # TAILCALL ; ; X64-LABEL: test_tail_call: ; X64: # %bb.0: # %entry ; X64-NEXT: incl (%rdi) ; X64-NEXT: setne %al ; X64-NEXT: incb {{.*}}(%rip) ; X64-NEXT: sete {{.*}}(%rip) ; X64-NEXT: testb %al, %al ; X64-NEXT: jne external_b # TAILCALL ; X64-NEXT: # %bb.1: # %then ; X64-NEXT: jmp external_a # TAILCALL entry: %val = load i32, i32* %ptr %inc = add i32 %val, 1 store i32 %inc, i32* %ptr %cmp = icmp eq i32 %inc, 0 %aval = load volatile i8, i8* @a %inc2 = add i8 %aval, 1 store volatile i8 %inc2, i8* @a %cmp2 = icmp eq i8 %inc2, 0 %conv5 = zext i1 %cmp2 to i8 store i8 %conv5, i8* @d br i1 %cmp, label %then, label %else then: tail call void @external_a() ret void else: tail call void @external_b() ret void } ; Test a function that gets special select lowering into CFG with copied EFLAGS ; threaded across the CFG. This requires our EFLAGS copy rewriting to handle ; cross-block rewrites in at least some narrow cases. define dso_local void @PR37100(i8 %arg1, i16 %arg2, i64 %arg3, i8 %arg4, i8* %ptr1, i32* %ptr2, i32 %x) nounwind { ; X32-LABEL: PR37100: ; X32: # %bb.0: # %bb ; X32-NEXT: pushl %ebp ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X32-NEXT: movb {{[0-9]+}}(%esp), %ch ; X32-NEXT: movb {{[0-9]+}}(%esp), %cl ; X32-NEXT: jmp .LBB3_1 ; X32-NEXT: .p2align 4, 0x90 ; X32-NEXT: .LBB3_5: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X32-NEXT: movl %esi, %eax ; X32-NEXT: cltd ; X32-NEXT: idivl %edi ; X32-NEXT: .LBB3_1: # %bb1 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movsbl %cl, %eax ; X32-NEXT: movl %eax, %edx ; X32-NEXT: sarl $31, %edx ; X32-NEXT: cmpl %eax, {{[0-9]+}}(%esp) ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: sbbl %edx, %eax ; X32-NEXT: setl %al ; X32-NEXT: setl %dl ; X32-NEXT: movzbl %dl, %edi ; X32-NEXT: negl %edi ; X32-NEXT: testb %al, %al ; X32-NEXT: jne .LBB3_3 ; X32-NEXT: # %bb.2: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X32-NEXT: movb %ch, %cl ; X32-NEXT: .LBB3_3: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X32-NEXT: movb %cl, (%ebp) ; X32-NEXT: movl (%ebx), %edx ; X32-NEXT: testb %al, %al ; X32-NEXT: jne .LBB3_5 ; X32-NEXT: # %bb.4: # %bb1 ; X32-NEXT: # in Loop: Header=BB3_1 Depth=1 ; X32-NEXT: movl %edx, %edi ; X32-NEXT: jmp .LBB3_5 ; ; X64-LABEL: PR37100: ; X64: # %bb.0: # %bb ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; X64-NEXT: movzbl %cl, %r11d ; X64-NEXT: .p2align 4, 0x90 ; X64-NEXT: .LBB3_1: # %bb1 ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movsbq %dil, %rax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: cmpq %rax, %rsi ; X64-NEXT: setl %cl ; X64-NEXT: negl %ecx ; X64-NEXT: cmpq %rax, %rsi ; X64-NEXT: movzbl %al, %edi ; X64-NEXT: cmovgel %r11d, %edi ; X64-NEXT: movb %dil, (%r8) ; X64-NEXT: cmovgel (%r9), %ecx ; X64-NEXT: movl %r10d, %eax ; X64-NEXT: cltd ; X64-NEXT: idivl %ecx ; X64-NEXT: jmp .LBB3_1 bb: br label %bb1 bb1: %tmp = phi i8 [ %tmp8, %bb1 ], [ %arg1, %bb ] %tmp2 = phi i16 [ %tmp12, %bb1 ], [ %arg2, %bb ] %tmp3 = icmp sgt i16 %tmp2, 7 %tmp4 = select i1 %tmp3, i16 %tmp2, i16 7 %tmp5 = sext i8 %tmp to i64 %tmp6 = icmp slt i64 %arg3, %tmp5 %tmp7 = sext i1 %tmp6 to i32 %tmp8 = select i1 %tmp6, i8 %tmp, i8 %arg4 store volatile i8 %tmp8, i8* %ptr1 %tmp9 = load volatile i32, i32* %ptr2 %tmp10 = select i1 %tmp6, i32 %tmp7, i32 %tmp9 %tmp11 = srem i32 %x, %tmp10 %tmp12 = trunc i32 %tmp11 to i16 br label %bb1 } ; Use a particular instruction pattern in order to lower to the post-RA pseudo ; used to lower SETB into an SBB pattern in order to make sure that kind of ; usage of a copied EFLAGS continues to work. define dso_local void @PR37431(i32* %arg1, i8* %arg2, i8* %arg3, i32 %arg4, i64 %arg5) nounwind { ; X32-LABEL: PR37431: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-NEXT: movl (%edi), %edi ; X32-NEXT: movl %edi, %ebx ; X32-NEXT: sarl $31, %ebx ; X32-NEXT: cmpl %edi, {{[0-9]+}}(%esp) ; X32-NEXT: sbbl %ebx, %esi ; X32-NEXT: sbbl %ebx, %ebx ; X32-NEXT: movb %bl, (%edx) ; X32-NEXT: cltd ; X32-NEXT: idivl %ebx ; X32-NEXT: movb %dl, (%ecx) ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl ; ; X64-LABEL: PR37431: ; X64: # %bb.0: # %entry ; X64-NEXT: movl %ecx, %eax ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movslq (%rdi), %rdx ; X64-NEXT: cmpq %rdx, %r8 ; X64-NEXT: sbbl %edi, %edi ; X64-NEXT: movb %dil, (%rsi) ; X64-NEXT: cltd ; X64-NEXT: idivl %edi ; X64-NEXT: movb %dl, (%rcx) ; X64-NEXT: retq entry: %tmp = load i32, i32* %arg1 %tmp1 = sext i32 %tmp to i64 %tmp2 = icmp ugt i64 %tmp1, %arg5 %tmp3 = zext i1 %tmp2 to i8 %tmp4 = sub i8 0, %tmp3 store i8 %tmp4, i8* %arg2 %tmp5 = sext i8 %tmp4 to i32 %tmp6 = srem i32 %arg4, %tmp5 %tmp7 = trunc i32 %tmp6 to i8 store i8 %tmp7, i8* %arg3 ret void }