343 lines
8.9 KiB
YAML
343 lines
8.9 KiB
YAML
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -verify-machineinstrs %s -o - | FileCheck -check-prefixes=FULL,ALL %s
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=none -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefixes=SIMPLE,ALL %s
|
|
|
|
|
|
---
|
|
# ALL-LABEL: name: kernel0
|
|
# FULL: machineFunctionInfo:
|
|
# FULL-NEXT: explicitKernArgSize: 128
|
|
# FULL-NEXT: maxKernArgAlign: 64
|
|
# FULL-NEXT: ldsSize: 2048
|
|
# FULL-NEXT: dynLDSAlign: 1
|
|
# FULL-NEXT: isEntryFunction: true
|
|
# FULL-NEXT: noSignedZerosFPMath: false
|
|
# FULL-NEXT: memoryBound: true
|
|
# FULL-NEXT: waveLimiter: true
|
|
# FULL-NEXT: hasSpilledSGPRs: false
|
|
# FULL-NEXT: hasSpilledVGPRs: false
|
|
# FULL-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11'
|
|
# FULL-NEXT: frameOffsetReg: '$sgpr12'
|
|
# FULL-NEXT: stackPtrOffsetReg: '$sgpr13'
|
|
# FULL-NEXT: argumentInfo:
|
|
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
# FULL-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
|
|
# FULL-NEXT: workGroupIDX: { reg: '$sgpr6' }
|
|
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
|
|
# FULL-NEXT: workItemIDX: { reg: '$vgpr0' }
|
|
# FULL-NEXT: mode:
|
|
# FULL-NEXT: ieee: true
|
|
# FULL-NEXT: dx10-clamp: true
|
|
# FULL-NEXT: fp32-input-denormals: true
|
|
# FULL-NEXT: fp32-output-denormals: true
|
|
# FULL-NEXT: fp64-fp16-input-denormals: true
|
|
# FULL-NEXT: fp64-fp16-output-denormals: true
|
|
# FULL-NEXT: highBitsOf32BitAddress: 0
|
|
# FULL-NEXT: occupancy: 10
|
|
# FULL-NEXT: body:
|
|
|
|
# SIMPLE: machineFunctionInfo:
|
|
# SIMPLE-NEXT: explicitKernArgSize: 128
|
|
# SIMPLE-NEXT: maxKernArgAlign: 64
|
|
# SIMPLE-NEXT: ldsSize: 2048
|
|
# SIMPLE-NEXT: isEntryFunction: true
|
|
# SIMPLE-NEXT: memoryBound: true
|
|
# SIMPLE-NEXT: waveLimiter: true
|
|
# SIMPLE-NEXT: scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11'
|
|
# SIMPLE-NEXT: frameOffsetReg: '$sgpr12'
|
|
# SIMPLE-NEXT: stackPtrOffsetReg: '$sgpr13'
|
|
# SIMPLE-NEXT: argumentInfo:
|
|
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
# SIMPLE-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
|
|
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' }
|
|
# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
|
|
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' }
|
|
# SIMPLE-NEXT: occupancy: 10
|
|
# SIMPLE-NEXT: body:
|
|
name: kernel0
|
|
machineFunctionInfo:
|
|
explicitKernArgSize: 128
|
|
maxKernArgAlign: 64
|
|
ldsSize: 2048
|
|
isEntryFunction: true
|
|
noSignedZerosFPMath: false
|
|
memoryBound: true
|
|
waveLimiter: true
|
|
scratchRSrcReg: '$sgpr8_sgpr9_sgpr10_sgpr11'
|
|
frameOffsetReg: '$sgpr12'
|
|
stackPtrOffsetReg: '$sgpr13'
|
|
argumentInfo:
|
|
privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
|
|
workGroupIDX: { reg: '$sgpr6' }
|
|
privateSegmentWaveByteOffset: { reg: '$sgpr7' }
|
|
workItemIDX: { reg: '$vgpr0' }
|
|
body: |
|
|
bb.0:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
# FIXME: Should be able to not print section for simple
|
|
---
|
|
# ALL-LABEL: name: no_mfi
|
|
# FULL: machineFunctionInfo:
|
|
# FULL-NEXT: explicitKernArgSize: 0
|
|
# FULL-NEXT: maxKernArgAlign: 1
|
|
# FULL-NEXT: ldsSize: 0
|
|
# FULL-NEXT: dynLDSAlign: 1
|
|
# FULL-NEXT: isEntryFunction: false
|
|
# FULL-NEXT: noSignedZerosFPMath: false
|
|
# FULL-NEXT: memoryBound: false
|
|
# FULL-NEXT: waveLimiter: false
|
|
# FULL-NEXT: hasSpilledSGPRs: false
|
|
# FULL-NEXT: hasSpilledVGPRs: false
|
|
# FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg'
|
|
# FULL-NEXT: frameOffsetReg: '$fp_reg'
|
|
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
|
|
# FULL-NEXT: argumentInfo:
|
|
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
# FULL-NEXT: mode:
|
|
# FULL-NEXT: ieee: true
|
|
# FULL-NEXT: dx10-clamp: true
|
|
# FULL-NEXT: fp32-input-denormals: true
|
|
# FULL-NEXT: fp32-output-denormals: true
|
|
# FULL-NEXT: fp64-fp16-input-denormals: true
|
|
# FULL-NEXT: fp64-fp16-output-denormals: true
|
|
# FULL-NEXT: highBitsOf32BitAddress: 0
|
|
# FULL-NEXT: occupancy: 10
|
|
# FULL-NEXT: body:
|
|
|
|
# SIMPLE: machineFunctionInfo:
|
|
# SIMPLE-NEXT: maxKernArgAlign: 1
|
|
# SIMPLE-NEXT: argumentInfo:
|
|
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
# SIMPLE-NEXT: occupancy: 10
|
|
# SIMPLE-NEXT: body:
|
|
|
|
name: no_mfi
|
|
body: |
|
|
bb.0:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
---
|
|
# ALL-LABEL: name: empty_mfi
|
|
# FULL: machineFunctionInfo:
|
|
# FULL-NEXT: explicitKernArgSize: 0
|
|
# FULL-NEXT: maxKernArgAlign: 1
|
|
# FULL-NEXT: ldsSize: 0
|
|
# FULL-NEXT: dynLDSAlign: 1
|
|
# FULL-NEXT: isEntryFunction: false
|
|
# FULL-NEXT: noSignedZerosFPMath: false
|
|
# FULL-NEXT: memoryBound: false
|
|
# FULL-NEXT: waveLimiter: false
|
|
# FULL-NEXT: hasSpilledSGPRs: false
|
|
# FULL-NEXT: hasSpilledVGPRs: false
|
|
# FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg'
|
|
# FULL-NEXT: frameOffsetReg: '$fp_reg'
|
|
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
|
|
# FULL-NEXT: argumentInfo:
|
|
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
# FULL-NEXT: mode:
|
|
# FULL-NEXT: ieee: true
|
|
# FULL-NEXT: dx10-clamp: true
|
|
# FULL-NEXT: fp32-input-denormals: true
|
|
# FULL-NEXT: fp32-output-denormals: true
|
|
# FULL-NEXT: fp64-fp16-input-denormals: true
|
|
# FULL-NEXT: fp64-fp16-output-denormals: true
|
|
# FULL-NEXT: highBitsOf32BitAddress: 0
|
|
# FULL-NEXT: occupancy: 10
|
|
# FULL-NEXT: body:
|
|
|
|
# SIMPLE: machineFunctionInfo:
|
|
# SIMPLE-NEXT: maxKernArgAlign: 1
|
|
# SIMPLE-NEXT: argumentInfo:
|
|
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
# SIMPLE-NEXT: occupancy: 10
|
|
# SIMPLE-NEXT: body:
|
|
|
|
name: empty_mfi
|
|
machineFunctionInfo:
|
|
body: |
|
|
bb.0:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
---
|
|
# ALL-LABEL: name: empty_mfi_entry_func
|
|
# FULL: machineFunctionInfo:
|
|
# FULL-NEXT: explicitKernArgSize: 0
|
|
# FULL-NEXT: maxKernArgAlign: 1
|
|
# FULL-NEXT: ldsSize: 0
|
|
# FULL-NEXT: dynLDSAlign: 1
|
|
# FULL-NEXT: isEntryFunction: true
|
|
# FULL-NEXT: noSignedZerosFPMath: false
|
|
# FULL-NEXT: memoryBound: false
|
|
# FULL-NEXT: waveLimiter: false
|
|
# FULL-NEXT: hasSpilledSGPRs: false
|
|
# FULL-NEXT: hasSpilledVGPRs: false
|
|
# FULL-NEXT: scratchRSrcReg: '$private_rsrc_reg'
|
|
# FULL-NEXT: frameOffsetReg: '$fp_reg'
|
|
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
|
|
# FULL-NEXT: argumentInfo:
|
|
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
# FULL-NEXT: mode:
|
|
# FULL-NEXT: ieee: true
|
|
# FULL-NEXT: dx10-clamp: true
|
|
# FULL-NEXT: fp32-input-denormals: true
|
|
# FULL-NEXT: fp32-output-denormals: true
|
|
# FULL-NEXT: fp64-fp16-input-denormals: true
|
|
# FULL-NEXT: fp64-fp16-output-denormals: true
|
|
# FULL-NEXT: highBitsOf32BitAddress: 0
|
|
# FULL-NEXT: occupancy: 10
|
|
# FULL-NEXT: body:
|
|
|
|
# SIMPLE: machineFunctionInfo:
|
|
# SIMPLE-NEXT: maxKernArgAlign: 1
|
|
# SIMPLE-NEXT: isEntryFunction: true
|
|
# SIMPLE-NEXT: argumentInfo:
|
|
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
# SIMPLE-NEXT: occupancy: 10
|
|
# SIMPLE-NEXT: body:
|
|
|
|
name: empty_mfi_entry_func
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
body: |
|
|
bb.0:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
---
|
|
# ALL-LABEL: name: default_regs_mfi
|
|
|
|
# FULL: scratchRSrcReg: '$private_rsrc_reg'
|
|
# FULL-NEXT: frameOffsetReg: '$fp_reg'
|
|
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
|
|
|
|
# SIMPLE-NOT: scratchRSrcReg
|
|
# SIMPLE-NOT:: stackPtrOffsetReg
|
|
name: default_regs_mfi
|
|
machineFunctionInfo:
|
|
scratchRSrcReg: '$private_rsrc_reg'
|
|
|
|
body: |
|
|
bb.0:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
---
|
|
# ALL-LABEL: name: fake_stack_arginfo
|
|
|
|
# FULL: argumentInfo:
|
|
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
# FULL-NEXT: flatScratchInit: { offset: 4 }
|
|
# FULL-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
|
|
|
|
# SIMPLE: argumentInfo:
|
|
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
|
|
# SIMPLE-NEXT: flatScratchInit: { offset: 4 }
|
|
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
|
|
name: fake_stack_arginfo
|
|
machineFunctionInfo:
|
|
argumentInfo:
|
|
flatScratchInit: { offset: 4 }
|
|
workItemIDY: { reg: '$vgpr0' , mask: 0xff00 }
|
|
|
|
body: |
|
|
bb.0:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
---
|
|
# ALL-LABEL: name: parse_mode
|
|
# ALL: mode:
|
|
# ALL-NEXT: ieee: false
|
|
# ALL-NEXT: dx10-clamp: false
|
|
# ALL-NEXT: fp32-input-denormals: false
|
|
# ALL-NEXT: fp32-output-denormals: false
|
|
# ALL-NEXT: fp64-fp16-input-denormals: false
|
|
# ALL-NEXT: fp64-fp16-output-denormals: false
|
|
|
|
name: parse_mode
|
|
machineFunctionInfo:
|
|
mode:
|
|
ieee: false
|
|
dx10-clamp: false
|
|
fp32-input-denormals: false
|
|
fp32-output-denormals: false
|
|
fp64-fp16-input-denormals: false
|
|
fp64-fp16-output-denormals: false
|
|
|
|
body: |
|
|
bb.0:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
|
|
---
|
|
# ALL-LABEL: name: parse_spilled_regs
|
|
# ALL: machineFunctionInfo:
|
|
# ALL: hasSpilledSGPRs: true
|
|
# ALL-NEXT: hasSpilledVGPRs: true
|
|
|
|
name: parse_spilled_regs
|
|
machineFunctionInfo:
|
|
hasSpilledSGPRs: true
|
|
hasSpilledVGPRs: true
|
|
|
|
body: |
|
|
bb.0:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
---
|
|
# ALL-LABEL: name: dyn_lds_with_alignment
|
|
|
|
# FULL: ldsSize: 0
|
|
# FULL-NEXT: dynLDSAlign: 8
|
|
|
|
# SIMPLE: dynLDSAlign: 8
|
|
name: dyn_lds_with_alignment
|
|
machineFunctionInfo:
|
|
dynLDSAlign: 8
|
|
|
|
body: |
|
|
bb.0:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
---
|
|
# ALL-LABEL: name: occupancy_0
|
|
# ALL: occupancy: 10
|
|
name: occupancy_0
|
|
machineFunctionInfo:
|
|
occupancy: 0
|
|
|
|
body: |
|
|
bb.0:
|
|
S_ENDPGM 0
|
|
|
|
...
|
|
|
|
---
|
|
# ALL-LABEL: name: occupancy_3
|
|
# ALL: occupancy: 3
|
|
name: occupancy_3
|
|
machineFunctionInfo:
|
|
occupancy: 3
|
|
|
|
body: |
|
|
bb.0:
|
|
S_ENDPGM 0
|
|
|
|
...
|