# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX9,GFX9_10 %s # RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX10,GFX9_10 %s --- | define amdgpu_kernel void @max-counter-lgkmcnt() #0 { ret void } define amdgpu_kernel void @max-counter-vmcnt() #0 { ret void } define amdgpu_kernel void @max-counter-expcnt() #0 { ret void } attributes #0 = { "amdgpu-flat-work-group-size"="1,256" } ... # Check that we handle cases where a counter has overflowed. # Overflows lgkmcnt with gfx9 but not with gfx10. --- name: max-counter-lgkmcnt body: | bb.0: liveins: $vgpr99 ; GFX9-LABEL: name: max-counter-lgkmcnt ; GFX9: S_WAITCNT 0 ; GFX9: $vgpr34_vgpr35 = DS_READ2_B32_gfx9 renamable $vgpr99, 34, 35, 0, implicit $exec ; GFX9-NOT: S_WAITCNT 53119 ; GFX9-NEXT: S_WAITCNT 52863 ; GFX9-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec ; GFX9-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec ; GFX9-NEXT: $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $mode, implicit $exec ; GFX9-NEXT: $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $mode, implicit $exec ; GFX9-NEXT: S_ENDPGM 0 ; GFX10-LABEL: name: max-counter-lgkmcnt ; GFX10: $vgpr34_vgpr35 = DS_READ2_B32_gfx9 renamable $vgpr99, 34, 35, 0, implicit $exec ; GFX10-NEXT: S_WAITCNT 53631 ; GFX10-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec ; GFX10-NEXT: S_WAITCNT 53375 ; GFX10-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec ; GFX10-NEXT: S_WAITCNT 53119 ; GFX10-NEXT: $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $mode, implicit $exec ; GFX10-NEXT: S_WAITCNT 52863 ; GFX10-NEXT: $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $mode, implicit $exec ; GFX10-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = DS_READ2_B32_gfx9 renamable $vgpr99, 0, 1, 0, implicit $exec $vgpr2_vgpr3 = DS_READ2_B32_gfx9 renamable $vgpr99, 2, 3, 0, implicit $exec $vgpr4_vgpr5 = DS_READ2_B32_gfx9 renamable $vgpr99, 4, 5, 0, implicit $exec $vgpr6_vgpr7 = DS_READ2_B32_gfx9 renamable $vgpr99, 6, 7, 0, implicit $exec $vgpr8_vgpr9 = DS_READ2_B32_gfx9 renamable $vgpr99, 8, 9, 0, implicit $exec $vgpr10_vgpr11 = DS_READ2_B32_gfx9 renamable $vgpr99, 10, 11, 0, implicit $exec $vgpr12_vgpr13 = DS_READ2_B32_gfx9 renamable $vgpr99, 12, 13, 0, implicit $exec $vgpr14_vgpr15 = DS_READ2_B32_gfx9 renamable $vgpr99, 14, 15, 0, implicit $exec $vgpr16_vgpr17 = DS_READ2_B32_gfx9 renamable $vgpr99, 16, 17, 0, implicit $exec $vgpr18_vgpr19 = DS_READ2_B32_gfx9 renamable $vgpr99, 18, 19, 0, implicit $exec $vgpr20_vgpr21 = DS_READ2_B32_gfx9 renamable $vgpr99, 20, 21, 0, implicit $exec $vgpr22_vgpr23 = DS_READ2_B32_gfx9 renamable $vgpr99, 22, 23, 0, implicit $exec $vgpr24_vgpr25 = DS_READ2_B32_gfx9 renamable $vgpr99, 24, 25, 0, implicit $exec $vgpr26_vgpr27 = DS_READ2_B32_gfx9 renamable $vgpr99, 26, 27, 0, implicit $exec $vgpr28_vgpr29 = DS_READ2_B32_gfx9 renamable $vgpr99, 28, 29, 0, implicit $exec $vgpr30_vgpr31 = DS_READ2_B32_gfx9 renamable $vgpr99, 30, 31, 0, implicit $exec $vgpr32_vgpr33 = DS_READ2_B32_gfx9 renamable $vgpr99, 32, 33, 0, implicit $exec $vgpr34_vgpr35 = DS_READ2_B32_gfx9 renamable $vgpr99, 34, 35, 0, implicit $exec $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec $vgpr4 = V_MAC_F32_e32 0, $vgpr5, $vgpr4, implicit $mode, implicit $exec $vgpr6 = V_MAC_F32_e32 0, $vgpr7, $vgpr6, implicit $mode, implicit $exec S_ENDPGM 0 ... # Overflows vmcnt with gfx9 and gfx10. --- name: max-counter-vmcnt body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4 ; GFX9_10-LABEL: name: max-counter-vmcnt ; GFX9_10: $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, 0, 0, implicit $exec ; GFX9-NOT: S_WAITCNT 53119 ; GFX10-NOT: S_WAITCNT 65407 ; GFX9-NEXT: S_WAITCNT 53118 ; GFX10-NEXT: S_WAITCNT 65406 ; GFX9_10-NEXT: $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec ; GFX9_10-NEXT: $vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $mode, implicit $exec ; GFX9_10-NEXT: $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec ; GFX9_10-NEXT: $vgpr3 = V_MAC_F32_e32 0, $vgpr4, $vgpr3, implicit $mode, implicit $exec ; GFX9_10-NEXT: S_ENDPGM 0 $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, implicit $exec $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, implicit $exec $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, 0, 0, implicit $exec $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, 0, 0, implicit $exec $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 20, 0, 0, 0, 0, 0, implicit $exec $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 24, 0, 0, 0, 0, 0, implicit $exec $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 28, 0, 0, 0, 0, 0, implicit $exec $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 32, 0, 0, 0, 0, 0, implicit $exec $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 36, 0, 0, 0, 0, 0, implicit $exec $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 40, 0, 0, 0, 0, 0, implicit $exec $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 44, 0, 0, 0, 0, 0, implicit $exec $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 48, 0, 0, 0, 0, 0, implicit $exec $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 52, 0, 0, 0, 0, 0, implicit $exec $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 56, 0, 0, 0, 0, 0, implicit $exec $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 60, 0, 0, 0, 0, 0, implicit $exec $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 64, 0, 0, 0, 0, 0, implicit $exec $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 68, 0, 0, 0, 0, 0, implicit $exec $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 72, 0, 0, 0, 0, 0, implicit $exec $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 76, 0, 0, 0, 0, 0, implicit $exec $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 80, 0, 0, 0, 0, 0, implicit $exec $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 84, 0, 0, 0, 0, 0, implicit $exec $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 88, 0, 0, 0, 0, 0, implicit $exec $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 92, 0, 0, 0, 0, 0, implicit $exec $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 96, 0, 0, 0, 0, 0, implicit $exec $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 100, 0, 0, 0, 0, 0, implicit $exec $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 104, 0, 0, 0, 0, 0, implicit $exec $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 108, 0, 0, 0, 0, 0, implicit $exec $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 112, 0, 0, 0, 0, 0, implicit $exec $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 116, 0, 0, 0, 0, 0, implicit $exec $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 120, 0, 0, 0, 0, 0, implicit $exec $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 124, 0, 0, 0, 0, 0, implicit $exec $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 128, 0, 0, 0, 0, 0, implicit $exec $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 132, 0, 0, 0, 0, 0, implicit $exec $vgpr34 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 136, 0, 0, 0, 0, 0, implicit $exec $vgpr35 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 140, 0, 0, 0, 0, 0, implicit $exec $vgpr36 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 144, 0, 0, 0, 0, 0, implicit $exec $vgpr37 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 148, 0, 0, 0, 0, 0, implicit $exec $vgpr38 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 152, 0, 0, 0, 0, 0, implicit $exec $vgpr39 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 156, 0, 0, 0, 0, 0, implicit $exec $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 160, 0, 0, 0, 0, 0, implicit $exec $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 164, 0, 0, 0, 0, 0, implicit $exec $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 168, 0, 0, 0, 0, 0, implicit $exec $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 172, 0, 0, 0, 0, 0, implicit $exec $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 176, 0, 0, 0, 0, 0, implicit $exec $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 180, 0, 0, 0, 0, 0, implicit $exec $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 184, 0, 0, 0, 0, 0, implicit $exec $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 188, 0, 0, 0, 0, 0, implicit $exec $vgpr48 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 192, 0, 0, 0, 0, 0, implicit $exec $vgpr49 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 196, 0, 0, 0, 0, 0, implicit $exec $vgpr50 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 200, 0, 0, 0, 0, 0, implicit $exec $vgpr51 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 204, 0, 0, 0, 0, 0, implicit $exec $vgpr52 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 208, 0, 0, 0, 0, 0, implicit $exec $vgpr53 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 212, 0, 0, 0, 0, 0, implicit $exec $vgpr54 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 216, 0, 0, 0, 0, 0, implicit $exec $vgpr55 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 220, 0, 0, 0, 0, 0, implicit $exec $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 224, 0, 0, 0, 0, 0, implicit $exec $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 228, 0, 0, 0, 0, 0, implicit $exec $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 232, 0, 0, 0, 0, 0, implicit $exec $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 236, 0, 0, 0, 0, 0, implicit $exec $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 240, 0, 0, 0, 0, 0, implicit $exec $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 244, 0, 0, 0, 0, 0, implicit $exec $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 248, 0, 0, 0, 0, 0, implicit $exec $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 252, 0, 0, 0, 0, 0, implicit $exec $vgpr64 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 256, 0, 0, 0, 0, 0, implicit $exec $vgpr65 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 260, 0, 0, 0, 0, 0, implicit $exec $vgpr66 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 264, 0, 0, 0, 0, 0, implicit $exec $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec $vgpr1 = V_MAC_F32_e32 0, $vgpr2, $vgpr1, implicit $mode, implicit $exec $vgpr2 = V_MAC_F32_e32 0, $vgpr3, $vgpr2, implicit $mode, implicit $exec $vgpr3 = V_MAC_F32_e32 0, $vgpr4, $vgpr3, implicit $mode, implicit $exec S_ENDPGM 0 ... # Should be no waitcnt if expcnt overflows. --- name: max-counter-expcnt body: | bb.0: liveins: $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, $vgpr0, $vgpr1 ; GFX9_10-LABEL: name: max-counter-expcnt ; GFX9_10: EXP ; GFX9_10-NOT: S_WAITCNT EXP 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec EXP 0, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec $vgpr0 = V_MAC_F32_e32 0, $vgpr1, $vgpr0, implicit $mode, implicit $exec S_ENDPGM 0 ...