; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; bswap should be constant folded when it is passed a constant argument ; RUN: llc < %s -mtriple=i686-- -mcpu=i686 | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefix=CHECK64 declare i16 @llvm.bswap.i16(i16) declare i32 @llvm.bswap.i32(i32) declare i64 @llvm.bswap.i64(i64) define i16 @W(i16 %A) { ; CHECK-LABEL: W: ; CHECK: # %bb.0: ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: rolw $8, %ax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: W: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movl %edi, %eax ; CHECK64-NEXT: rolw $8, %ax ; CHECK64-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK64-NEXT: retq %Z = call i16 @llvm.bswap.i16( i16 %A ) ; [#uses=1] ret i16 %Z } define dso_local i32 @X(i32 %A) { ; CHECK-LABEL: X: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: bswapl %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: X: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movl %edi, %eax ; CHECK64-NEXT: bswapl %eax ; CHECK64-NEXT: retq %Z = call i32 @llvm.bswap.i32( i32 %A ) ; [#uses=1] ret i32 %Z } define i64 @Y(i64 %A) { ; CHECK-LABEL: Y: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: bswapl %eax ; CHECK-NEXT: bswapl %edx ; CHECK-NEXT: retl ; ; CHECK64-LABEL: Y: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movq %rdi, %rax ; CHECK64-NEXT: bswapq %rax ; CHECK64-NEXT: retq %Z = call i64 @llvm.bswap.i64( i64 %A ) ; [#uses=1] ret i64 %Z } ; This isn't really a bswap test, but the potential probem is ; easier to see with bswap vs. other ops. The transform in ; question starts with a bitwise logic op and tries to hoist ; those ahead of other ops. But that's not generally profitable ; when the other ops have other uses (and it might not be safe ; either due to unconstrained instruction count growth). define dso_local i32 @bswap_multiuse(i32 %x, i32 %y, i32* %p1, i32* %p2) nounwind { ; CHECK-LABEL: bswap_multiuse: ; CHECK: # %bb.0: ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: bswapl %esi ; CHECK-NEXT: bswapl %eax ; CHECK-NEXT: movl %esi, (%edx) ; CHECK-NEXT: movl %eax, (%ecx) ; CHECK-NEXT: orl %esi, %eax ; CHECK-NEXT: popl %esi ; CHECK-NEXT: retl ; ; CHECK64-LABEL: bswap_multiuse: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movl %esi, %eax ; CHECK64-NEXT: bswapl %edi ; CHECK64-NEXT: bswapl %eax ; CHECK64-NEXT: movl %edi, (%rdx) ; CHECK64-NEXT: movl %eax, (%rcx) ; CHECK64-NEXT: orl %edi, %eax ; CHECK64-NEXT: retq %xt = call i32 @llvm.bswap.i32(i32 %x) %yt = call i32 @llvm.bswap.i32(i32 %y) store i32 %xt, i32* %p1 store i32 %yt, i32* %p2 %r = or i32 %xt, %yt ret i32 %r } ; rdar://9164521 define dso_local i32 @test1(i32 %a) nounwind readnone { ; CHECK-LABEL: test1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: bswapl %eax ; CHECK-NEXT: shrl $16, %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: test1: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movl %edi, %eax ; CHECK64-NEXT: bswapl %eax ; CHECK64-NEXT: shrl $16, %eax ; CHECK64-NEXT: retq %and = lshr i32 %a, 8 %shr3 = and i32 %and, 255 %and2 = shl i32 %a, 8 %shl = and i32 %and2, 65280 %or = or i32 %shr3, %shl ret i32 %or } define dso_local i32 @test2(i32 %a) nounwind readnone { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: bswapl %eax ; CHECK-NEXT: sarl $16, %eax ; CHECK-NEXT: retl ; ; CHECK64-LABEL: test2: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movl %edi, %eax ; CHECK64-NEXT: bswapl %eax ; CHECK64-NEXT: sarl $16, %eax ; CHECK64-NEXT: retq %and = lshr i32 %a, 8 %shr4 = and i32 %and, 255 %and2 = shl i32 %a, 8 %or = or i32 %shr4, %and2 %sext = shl i32 %or, 16 %conv3 = ashr exact i32 %sext, 16 ret i32 %conv3 } @var8 = dso_local global i8 0 @var16 = dso_local global i16 0 ; The "shl" below can move bits into the high parts of the value, so the ; operation is not a "bswap, shr" pair. ; rdar://problem/14814049 define i64 @not_bswap() { ; CHECK-LABEL: not_bswap: ; CHECK: # %bb.0: ; CHECK-NEXT: movzwl var16, %eax ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrl $8, %ecx ; CHECK-NEXT: shll $8, %eax ; CHECK-NEXT: orl %ecx, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: retl ; ; CHECK64-LABEL: not_bswap: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movzwl {{.*}}(%rip), %eax ; CHECK64-NEXT: movq %rax, %rcx ; CHECK64-NEXT: shrq $8, %rcx ; CHECK64-NEXT: shlq $8, %rax ; CHECK64-NEXT: orq %rcx, %rax ; CHECK64-NEXT: retq %init = load i16, i16* @var16 %big = zext i16 %init to i64 %hishifted = lshr i64 %big, 8 %loshifted = shl i64 %big, 8 %notswapped = or i64 %hishifted, %loshifted ret i64 %notswapped } ; This time, the lshr (and subsequent or) is completely useless. While it's ; technically correct to convert this into a "bswap, shr", it's suboptimal. A ; simple shl works better. define i64 @not_useful_bswap() { ; CHECK-LABEL: not_useful_bswap: ; CHECK: # %bb.0: ; CHECK-NEXT: movzbl var8, %eax ; CHECK-NEXT: shll $8, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: retl ; ; CHECK64-LABEL: not_useful_bswap: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movzbl {{.*}}(%rip), %eax ; CHECK64-NEXT: shlq $8, %rax ; CHECK64-NEXT: retq %init = load i8, i8* @var8 %big = zext i8 %init to i64 %hishifted = lshr i64 %big, 8 %loshifted = shl i64 %big, 8 %notswapped = or i64 %hishifted, %loshifted ret i64 %notswapped } ; Finally, it *is* OK to just mask off the shl if we know that the value is zero ; beyond 16 bits anyway. This is a legitimate bswap. define i64 @finally_useful_bswap() { ; CHECK-LABEL: finally_useful_bswap: ; CHECK: # %bb.0: ; CHECK-NEXT: movzwl var16, %eax ; CHECK-NEXT: bswapl %eax ; CHECK-NEXT: shrl $16, %eax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: retl ; ; CHECK64-LABEL: finally_useful_bswap: ; CHECK64: # %bb.0: ; CHECK64-NEXT: movzwl {{.*}}(%rip), %eax ; CHECK64-NEXT: bswapq %rax ; CHECK64-NEXT: shrq $48, %rax ; CHECK64-NEXT: retq %init = load i16, i16* @var16 %big = zext i16 %init to i64 %hishifted = lshr i64 %big, 8 %lomasked = and i64 %big, 255 %loshifted = shl i64 %lomasked, 8 %swapped = or i64 %hishifted, %loshifted ret i64 %swapped } ; Make sure we don't assert during type legalization promoting a large ; bswap due to the need for a large shift that won't fit in the i8 returned ; from getShiftAmountTy. define i528 @large_promotion(i528 %A) nounwind { ; CHECK-LABEL: large_promotion: ; CHECK: # %bb.0: ; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: subl $44, %esp ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: bswapl %eax ; CHECK-NEXT: bswapl %ecx ; CHECK-NEXT: shrdl $16, %ecx, %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: bswapl %edx ; CHECK-NEXT: shrdl $16, %edx, %ecx ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: bswapl %esi ; CHECK-NEXT: shrdl $16, %esi, %edx ; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: bswapl %edi ; CHECK-NEXT: shrdl $16, %edi, %esi ; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: bswapl %ebx ; CHECK-NEXT: shrdl $16, %ebx, %edi ; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: bswapl %ebp ; CHECK-NEXT: shrdl $16, %ebp, %ebx ; CHECK-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: bswapl %ecx ; CHECK-NEXT: shrdl $16, %ecx, %ebp ; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: bswapl %eax ; CHECK-NEXT: shrdl $16, %eax, %ecx ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: bswapl %ecx ; CHECK-NEXT: shrdl $16, %ecx, %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: bswapl %eax ; CHECK-NEXT: shrdl $16, %eax, %ecx ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp ; CHECK-NEXT: bswapl %ebp ; CHECK-NEXT: shrdl $16, %ebp, %eax ; CHECK-NEXT: movl %eax, (%esp) # 4-byte Spill ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebx ; CHECK-NEXT: bswapl %ebx ; CHECK-NEXT: shrdl $16, %ebx, %ebp ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: bswapl %esi ; CHECK-NEXT: shrdl $16, %esi, %ebx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: bswapl %edx ; CHECK-NEXT: shrdl $16, %edx, %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: bswapl %ecx ; CHECK-NEXT: shrdl $16, %ecx, %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi ; CHECK-NEXT: bswapl %edi ; CHECK-NEXT: shrdl $16, %edi, %ecx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %ecx, 60(%eax) ; CHECK-NEXT: movl %edx, 56(%eax) ; CHECK-NEXT: movl %esi, 52(%eax) ; CHECK-NEXT: movl %ebx, 48(%eax) ; CHECK-NEXT: movl %ebp, 44(%eax) ; CHECK-NEXT: movl (%esp), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, 40(%eax) ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, 36(%eax) ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, 32(%eax) ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, 28(%eax) ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, 24(%eax) ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, 20(%eax) ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, 16(%eax) ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, 12(%eax) ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, 8(%eax) ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, 4(%eax) ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: shrl $16, %edi ; CHECK-NEXT: movw %di, 64(%eax) ; CHECK-NEXT: addl $44, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi ; CHECK-NEXT: popl %ebx ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl $4 ; ; CHECK64-LABEL: large_promotion: ; CHECK64: # %bb.0: ; CHECK64-NEXT: pushq %rbx ; CHECK64-NEXT: movq %rdi, %rax ; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rbx ; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r11 ; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %rdi ; CHECK64-NEXT: movq {{[0-9]+}}(%rsp), %r10 ; CHECK64-NEXT: bswapq %r10 ; CHECK64-NEXT: bswapq %rdi ; CHECK64-NEXT: shrdq $48, %rdi, %r10 ; CHECK64-NEXT: bswapq %r11 ; CHECK64-NEXT: shrdq $48, %r11, %rdi ; CHECK64-NEXT: bswapq %rbx ; CHECK64-NEXT: shrdq $48, %rbx, %r11 ; CHECK64-NEXT: bswapq %r9 ; CHECK64-NEXT: shrdq $48, %r9, %rbx ; CHECK64-NEXT: bswapq %r8 ; CHECK64-NEXT: shrdq $48, %r8, %r9 ; CHECK64-NEXT: bswapq %rcx ; CHECK64-NEXT: shrdq $48, %rcx, %r8 ; CHECK64-NEXT: bswapq %rdx ; CHECK64-NEXT: shrdq $48, %rdx, %rcx ; CHECK64-NEXT: bswapq %rsi ; CHECK64-NEXT: shrdq $48, %rsi, %rdx ; CHECK64-NEXT: shrq $48, %rsi ; CHECK64-NEXT: movq %rdx, 56(%rax) ; CHECK64-NEXT: movq %rcx, 48(%rax) ; CHECK64-NEXT: movq %r8, 40(%rax) ; CHECK64-NEXT: movq %r9, 32(%rax) ; CHECK64-NEXT: movq %rbx, 24(%rax) ; CHECK64-NEXT: movq %r11, 16(%rax) ; CHECK64-NEXT: movq %rdi, 8(%rax) ; CHECK64-NEXT: movq %r10, (%rax) ; CHECK64-NEXT: movw %si, 64(%rax) ; CHECK64-NEXT: popq %rbx ; CHECK64-NEXT: retq %Z = call i528 @llvm.bswap.i528(i528 %A) ret i528 %Z } declare i528 @llvm.bswap.i528(i528)