# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -verify-machineinstrs %s -o - | FileCheck -check-prefixes=FULL,ALL %s # RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefixes=SIMPLE,ALL %s --- # ALL-LABEL: name: kernel0 # FULL: machineFunctionInfo: # FULL-NEXT: explicitKernArgSize: 128 # FULL-NEXT: maxKernArgAlign: 64 # FULL-NEXT: ldsSize: 2048 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: true # FULL-NEXT: noSignedZerosFPMath: false # FULL-NEXT: memoryBound: true # FULL-NEXT: waveLimiter: true # FULL-NEXT: hasSpilledSGPRs: false # FULL-NEXT: hasSpilledVGPRs: false # FULL-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11' # FULL-NEXT: frameOffsetReg: '$sgpr12' # FULL-NEXT: stackPtrOffsetReg: '$sgpr13' # FULL-NEXT: argumentInfo: # FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } # FULL-NEXT: workGroupIDX: { reg: '$sgpr6' } # FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } # FULL-NEXT: workItemIDX: { reg: '$vgpr0' } # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true # FULL-NEXT: fp32-input-denormals: true # FULL-NEXT: fp32-output-denormals: true # FULL-NEXT: fp64-fp16-input-denormals: true # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: # SIMPLE-NEXT: explicitKernArgSize: 128 # SIMPLE-NEXT: maxKernArgAlign: 64 # SIMPLE-NEXT: ldsSize: 2048 # SIMPLE-NEXT: isEntryFunction: true # SIMPLE-NEXT: memoryBound: true # SIMPLE-NEXT: waveLimiter: true # SIMPLE-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11' # SIMPLE-NEXT: frameOffsetReg: '$sgpr12' # SIMPLE-NEXT: stackPtrOffsetReg: '$sgpr13' # SIMPLE-NEXT: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # SIMPLE-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } # SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' } # SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' } # SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' } # SIMPLE-NEXT: occupancy: 10 # SIMPLE-NEXT: body: name: kernel0 machineFunctionInfo: explicitKernArgSize: 128 maxKernArgAlign: 64 ldsSize: 2048 isEntryFunction: true noSignedZerosFPMath: false memoryBound: true waveLimiter: true scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11' frameOffsetReg: '$sgpr12' stackPtrOffsetReg: '$sgpr13' argumentInfo: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } workGroupIDX: { reg: '$sgpr6' } privateSegmentWaveByteOffset: { reg: '$sgpr7' } workItemIDX: { reg: '$vgpr0' } body: | bb.0: S_ENDPGM 0 ... # FIXME: Should be able to not print section for simple --- # ALL-LABEL: name: no_mfi # FULL: machineFunctionInfo: # FULL-NEXT: explicitKernArgSize: 0 # FULL-NEXT: maxKernArgAlign: 1 # FULL-NEXT: ldsSize: 0 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: false # FULL-NEXT: noSignedZerosFPMath: false # FULL-NEXT: memoryBound: false # FULL-NEXT: waveLimiter: false # FULL-NEXT: hasSpilledSGPRs: false # FULL-NEXT: hasSpilledVGPRs: false # FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg' # FULL-NEXT: frameOffsetReg: '$fp_reg' # FULL-NEXT: stackPtrOffsetReg: '$sp_reg' # FULL-NEXT: argumentInfo: # FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true # FULL-NEXT: fp32-input-denormals: true # FULL-NEXT: fp32-output-denormals: true # FULL-NEXT: fp64-fp16-input-denormals: true # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: # SIMPLE-NEXT: maxKernArgAlign: 1 # SIMPLE-NEXT: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # SIMPLE-NEXT: occupancy: 10 # SIMPLE-NEXT: body: name: no_mfi body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: empty_mfi # FULL: machineFunctionInfo: # FULL-NEXT: explicitKernArgSize: 0 # FULL-NEXT: maxKernArgAlign: 1 # FULL-NEXT: ldsSize: 0 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: false # FULL-NEXT: noSignedZerosFPMath: false # FULL-NEXT: memoryBound: false # FULL-NEXT: waveLimiter: false # FULL-NEXT: hasSpilledSGPRs: false # FULL-NEXT: hasSpilledVGPRs: false # FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg' # FULL-NEXT: frameOffsetReg: '$fp_reg' # FULL-NEXT: stackPtrOffsetReg: '$sp_reg' # FULL-NEXT: argumentInfo: # FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true # FULL-NEXT: fp32-input-denormals: true # FULL-NEXT: fp32-output-denormals: true # FULL-NEXT: fp64-fp16-input-denormals: true # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: # SIMPLE-NEXT: maxKernArgAlign: 1 # SIMPLE-NEXT: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # SIMPLE-NEXT: occupancy: 10 # SIMPLE-NEXT: body: name: empty_mfi machineFunctionInfo: body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: empty_mfi_entry_func # FULL: machineFunctionInfo: # FULL-NEXT: explicitKernArgSize: 0 # FULL-NEXT: maxKernArgAlign: 1 # FULL-NEXT: ldsSize: 0 # FULL-NEXT: dynLDSAlign: 1 # FULL-NEXT: isEntryFunction: true # FULL-NEXT: noSignedZerosFPMath: false # FULL-NEXT: memoryBound: false # FULL-NEXT: waveLimiter: false # FULL-NEXT: hasSpilledSGPRs: false # FULL-NEXT: hasSpilledVGPRs: false # FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg' # FULL-NEXT: frameOffsetReg: '$fp_reg' # FULL-NEXT: stackPtrOffsetReg: '$sp_reg' # FULL-NEXT: argumentInfo: # FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL-NEXT: mode: # FULL-NEXT: ieee: true # FULL-NEXT: dx10-clamp: true # FULL-NEXT: fp32-input-denormals: true # FULL-NEXT: fp32-output-denormals: true # FULL-NEXT: fp64-fp16-input-denormals: true # FULL-NEXT: fp64-fp16-output-denormals: true # FULL-NEXT: highBitsOf32BitAddress: 0 # FULL-NEXT: occupancy: 10 # FULL-NEXT: body: # SIMPLE: machineFunctionInfo: # SIMPLE-NEXT: maxKernArgAlign: 1 # SIMPLE-NEXT: isEntryFunction: true # SIMPLE-NEXT: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # SIMPLE-NEXT: occupancy: 10 # SIMPLE-NEXT: body: name: empty_mfi_entry_func machineFunctionInfo: isEntryFunction: true body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: default_regs_mfi # FULL: scratchRSrcReg: '$private_rsrc_reg' # FULL-NEXT: frameOffsetReg: '$fp_reg' # FULL-NEXT: stackPtrOffsetReg: '$sp_reg' # SIMPLE-NOT: scratchRSrcReg # SIMPLE-NOT:: stackPtrOffsetReg name: default_regs_mfi machineFunctionInfo: scratchRSrcReg: '$private_rsrc_reg' body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: fake_stack_arginfo # FULL: argumentInfo: # FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # FULL-NEXT: flatScratchInit: { offset: 4 } # FULL-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 } # SIMPLE: argumentInfo: # SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } # SIMPLE-NEXT: flatScratchInit: { offset: 4 } # SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 } name: fake_stack_arginfo machineFunctionInfo: argumentInfo: flatScratchInit: { offset: 4 } workItemIDY: { reg: '$vgpr0' , mask: 0xff00 } body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: parse_mode # ALL: mode: # ALL-NEXT: ieee: false # ALL-NEXT: dx10-clamp: false # ALL-NEXT: fp32-input-denormals: false # ALL-NEXT: fp32-output-denormals: false # ALL-NEXT: fp64-fp16-input-denormals: false # ALL-NEXT: fp64-fp16-output-denormals: false name: parse_mode machineFunctionInfo: mode: ieee: false dx10-clamp: false fp32-input-denormals: false fp32-output-denormals: false fp64-fp16-input-denormals: false fp64-fp16-output-denormals: false body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: parse_spilled_regs # ALL: machineFunctionInfo: # ALL: hasSpilledSGPRs: true # ALL-NEXT: hasSpilledVGPRs: true name: parse_spilled_regs machineFunctionInfo: hasSpilledSGPRs: true hasSpilledVGPRs: true body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: dyn_lds_with_alignment # FULL: ldsSize: 0 # FULL-NEXT: dynLDSAlign: 8 # SIMPLE: dynLDSAlign: 8 name: dyn_lds_with_alignment machineFunctionInfo: dynLDSAlign: 8 body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: occupancy_0 # ALL: occupancy: 10 name: occupancy_0 machineFunctionInfo: occupancy: 0 body: | bb.0: S_ENDPGM 0 ... --- # ALL-LABEL: name: occupancy_3 # ALL: occupancy: 3 name: occupancy_3 machineFunctionInfo: occupancy: 3 body: | bb.0: S_ENDPGM 0 ...