; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc -O0 -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -stop-after=regallocfast < %s | FileCheck -check-prefixes=GCN %s ; Verify that we consider the xor at the end of the waterfall loop emitted for ; divergent indirect addressing as a terminator. declare i32 @llvm.amdgcn.workitem.id.x() #1 ; There should be no spill code inserted between the xor and the real terminator define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { ; GCN-LABEL: name: extract_w_offset_vgpr ; GCN: bb.0.entry: ; GCN: successors: %bb.1(0x80000000) ; GCN: liveins: $vgpr0, $sgpr0_sgpr1 ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4) ; GCN: renamable $sgpr6 = COPY renamable $sgpr1 ; GCN: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 ; GCN: renamable $sgpr4 = S_MOV_B32 61440 ; GCN: renamable $sgpr5 = S_MOV_B32 -1 ; GCN: undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 ; GCN: renamable $sgpr1 = COPY killed renamable $sgpr6 ; GCN: renamable $sgpr2 = COPY killed renamable $sgpr5 ; GCN: renamable $sgpr3 = COPY killed renamable $sgpr4 ; GCN: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store 16 into %stack.2, align 4, addrspace 5) ; GCN: renamable $sgpr0 = S_MOV_B32 16 ; GCN: renamable $sgpr1 = S_MOV_B32 15 ; GCN: renamable $sgpr2 = S_MOV_B32 14 ; GCN: renamable $sgpr3 = S_MOV_B32 13 ; GCN: renamable $sgpr4 = S_MOV_B32 12 ; GCN: renamable $sgpr5 = S_MOV_B32 11 ; GCN: renamable $sgpr6 = S_MOV_B32 10 ; GCN: renamable $sgpr7 = S_MOV_B32 9 ; GCN: renamable $sgpr8 = S_MOV_B32 8 ; GCN: renamable $sgpr9 = S_MOV_B32 7 ; GCN: renamable $sgpr10 = S_MOV_B32 6 ; GCN: renamable $sgpr11 = S_MOV_B32 5 ; GCN: renamable $sgpr12 = S_MOV_B32 3 ; GCN: renamable $sgpr13 = S_MOV_B32 2 ; GCN: renamable $sgpr14 = S_MOV_B32 1 ; GCN: renamable $sgpr15 = S_MOV_B32 0 ; GCN: renamable $vgpr0 = COPY killed renamable $sgpr15 ; GCN: renamable $vgpr30 = COPY killed renamable $sgpr14 ; GCN: renamable $vgpr29 = COPY killed renamable $sgpr13 ; GCN: renamable $vgpr28 = COPY killed renamable $sgpr12 ; GCN: renamable $vgpr27 = COPY killed renamable $sgpr11 ; GCN: renamable $vgpr26 = COPY killed renamable $sgpr10 ; GCN: renamable $vgpr25 = COPY killed renamable $sgpr9 ; GCN: renamable $vgpr24 = COPY killed renamable $sgpr8 ; GCN: renamable $vgpr23 = COPY killed renamable $sgpr7 ; GCN: renamable $vgpr22 = COPY killed renamable $sgpr6 ; GCN: renamable $vgpr21 = COPY killed renamable $sgpr5 ; GCN: renamable $vgpr20 = COPY killed renamable $sgpr4 ; GCN: renamable $vgpr19 = COPY killed renamable $sgpr3 ; GCN: renamable $vgpr18 = COPY killed renamable $sgpr2 ; GCN: renamable $vgpr17 = COPY killed renamable $sgpr1 ; GCN: renamable $vgpr16 = COPY killed renamable $sgpr0 ; GCN: undef renamable $vgpr0 = COPY killed renamable $vgpr0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 ; GCN: renamable $vgpr1 = COPY killed renamable $vgpr30 ; GCN: renamable $vgpr2 = COPY killed renamable $vgpr29 ; GCN: renamable $vgpr3 = COPY killed renamable $vgpr28 ; GCN: renamable $vgpr4 = COPY killed renamable $vgpr27 ; GCN: renamable $vgpr5 = COPY killed renamable $vgpr26 ; GCN: renamable $vgpr6 = COPY killed renamable $vgpr25 ; GCN: renamable $vgpr7 = COPY killed renamable $vgpr24 ; GCN: renamable $vgpr8 = COPY killed renamable $vgpr23 ; GCN: renamable $vgpr9 = COPY killed renamable $vgpr22 ; GCN: renamable $vgpr10 = COPY killed renamable $vgpr21 ; GCN: renamable $vgpr11 = COPY killed renamable $vgpr20 ; GCN: renamable $vgpr12 = COPY killed renamable $vgpr19 ; GCN: renamable $vgpr13 = COPY killed renamable $vgpr18 ; GCN: renamable $vgpr14 = COPY killed renamable $vgpr17 ; GCN: renamable $vgpr15 = COPY killed renamable $vgpr16 ; GCN: SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr32, 0, implicit $exec :: (store 64 into %stack.1, align 4, addrspace 5) ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) ; GCN: renamable $vgpr0 = IMPLICIT_DEF ; GCN: renamable $sgpr0_sgpr1 = IMPLICIT_DEF ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr32 :: (load 8 from %stack.4, align 4, addrspace 5) ; GCN: $vgpr17 = SI_SPILL_V32_RESTORE %stack.5, $sgpr32, 0, implicit $exec :: (load 4 from %stack.5, addrspace 5) ; GCN: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load 64 from %stack.1, align 4, addrspace 5) ; GCN: $vgpr16 = SI_SPILL_V32_RESTORE %stack.3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) ; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec ; GCN: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, $vgpr16, implicit $exec ; GCN: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN: renamable $vgpr0 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec ; GCN: SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr32, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr32, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr32 :: (store 8 into %stack.4, align 4, addrspace 5) ; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc ; GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec ; GCN: bb.3: ; GCN: successors: %bb.2(0x80000000) ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5) ; GCN: $exec = S_MOV_B64 renamable $sgpr0_sgpr1 ; GCN: bb.2: ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr32, 0, implicit $exec :: (load 4 from %stack.6, addrspace 5) ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load 16 from %stack.2, align 4, addrspace 5) ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1) ; GCN: S_ENDPGM 0 entry: %id = call i32 @llvm.amdgcn.workitem.id.x() #1 %index = add i32 %id, 1 %value = extractelement <16 x i32> , i32 %index store i32 %value, i32 addrspace(1)* %out ret void }