213 lines
8.1 KiB
Plaintext
213 lines
8.1 KiB
Plaintext
|
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI
|
||
|
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI
|
||
|
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI
|
||
|
|
||
|
--- |
|
||
|
define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void }
|
||
|
define amdgpu_kernel void @smrd_wide() { ret void }
|
||
|
define amdgpu_kernel void @constant_address_positive() { ret void }
|
||
|
...
|
||
|
---
|
||
|
|
||
|
name: smrd_imm
|
||
|
legalized: true
|
||
|
regBankSelected: true
|
||
|
|
||
|
# GCN: body:
|
||
|
# GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
|
||
|
|
||
|
# Immediate offset:
|
||
|
# SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0, 0
|
||
|
# VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0, 0
|
||
|
|
||
|
# Max immediate offset for SI
|
||
|
# SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0, 0
|
||
|
# VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0, 0
|
||
|
|
||
|
# Immediate overflow for SI
|
||
|
# SI: [[K1024:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
|
||
|
# SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0
|
||
|
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0
|
||
|
# VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0
|
||
|
|
||
|
# Max immediate offset for VI
|
||
|
# SI: [[K1048572:%[0-9]+]]:sreg_32 = S_MOV_B32 1048572
|
||
|
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143
|
||
|
# VI: S_LOAD_DWORD_IMM [[PTR]], 1048572
|
||
|
|
||
|
#
|
||
|
# Immediate overflow for VI
|
||
|
# SIVI: [[K1048576:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576
|
||
|
# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0
|
||
|
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0
|
||
|
|
||
|
# Max immediate for CI
|
||
|
# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
|
||
|
# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3
|
||
|
# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
|
||
|
# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
|
||
|
# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
|
||
|
# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
|
||
|
# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
|
||
|
# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
|
||
|
# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
|
||
|
# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
|
||
|
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
|
||
|
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0, 0
|
||
|
|
||
|
# Immediate overflow for CI
|
||
|
# GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||
|
# GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4
|
||
|
# GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
|
||
|
# GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
|
||
|
# GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
|
||
|
# GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
|
||
|
# GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
|
||
|
# GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
|
||
|
# GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
|
||
|
# GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
|
||
|
# GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
|
||
|
|
||
|
# Max 32-bit byte offset
|
||
|
# SIVI: [[K4294967292:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292
|
||
|
# SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0
|
||
|
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0
|
||
|
|
||
|
# Overflow 32-bit byte offset
|
||
|
# SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0
|
||
|
# SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1
|
||
|
# SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1
|
||
|
# SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0
|
||
|
# SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0
|
||
|
# SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]]
|
||
|
# SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1
|
||
|
# SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1
|
||
|
# SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]]
|
||
|
# SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1
|
||
|
# SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0, 0
|
||
|
# CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0, 0
|
||
|
|
||
|
# Pointer loads
|
||
|
# GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
|
||
|
# GCN: $sgpr0_sgpr1 = COPY [[AS0]]
|
||
|
# GCN: [[AS1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
|
||
|
# GCN: $sgpr0_sgpr1 = COPY [[AS1]]
|
||
|
# GCN: [[AS4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0
|
||
|
# GCN: $sgpr0_sgpr1 = COPY [[AS4]]
|
||
|
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $sgpr0_sgpr1
|
||
|
|
||
|
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||
|
|
||
|
%1:sgpr(s64) = G_CONSTANT i64 4
|
||
|
%2:sgpr(p4) = G_PTR_ADD %0, %1
|
||
|
%3:sgpr(s32) = G_LOAD %2 :: (load 4 from %ir.const0, addrspace 4)
|
||
|
$sgpr0 = COPY %3
|
||
|
|
||
|
%4:sgpr(s64) = G_CONSTANT i64 1020
|
||
|
%5:sgpr(p4) = G_PTR_ADD %0, %4
|
||
|
%6:sgpr(s32) = G_LOAD %5 :: (load 4 from %ir.const0, addrspace 4)
|
||
|
$sgpr0 = COPY %6
|
||
|
|
||
|
%7:sgpr(s64) = G_CONSTANT i64 1024
|
||
|
%8:sgpr(p4) = G_PTR_ADD %0, %7
|
||
|
%9:sgpr(s32) = G_LOAD %8 :: (load 4 from %ir.const0, addrspace 4)
|
||
|
$sgpr0 = COPY %9
|
||
|
|
||
|
%10:sgpr(s64) = G_CONSTANT i64 1048572
|
||
|
%11:sgpr(p4) = G_PTR_ADD %0, %10
|
||
|
%12:sgpr(s32) = G_LOAD %11 :: (load 4 from %ir.const0, addrspace 4)
|
||
|
$sgpr0 = COPY %12
|
||
|
|
||
|
%13:sgpr(s64) = G_CONSTANT i64 1048576
|
||
|
%14:sgpr(p4) = G_PTR_ADD %0, %13
|
||
|
%15:sgpr(s32) = G_LOAD %14 :: (load 4 from %ir.const0, addrspace 4)
|
||
|
$sgpr0 = COPY %15
|
||
|
|
||
|
%16:sgpr(s64) = G_CONSTANT i64 17179869180
|
||
|
%17:sgpr(p4) = G_PTR_ADD %0, %16
|
||
|
%18:sgpr(s32) = G_LOAD %17 :: (load 4 from %ir.const0, addrspace 4)
|
||
|
$sgpr0 = COPY %18
|
||
|
|
||
|
%19:sgpr(s64) = G_CONSTANT i64 17179869184
|
||
|
%20:sgpr(p4) = G_PTR_ADD %0, %19
|
||
|
%21:sgpr(s32) = G_LOAD %20 :: (load 4 from %ir.const0, addrspace 4)
|
||
|
$sgpr0 = COPY %21
|
||
|
|
||
|
%22:sgpr(s64) = G_CONSTANT i64 4294967292
|
||
|
%23:sgpr(p4) = G_PTR_ADD %0, %22
|
||
|
%24:sgpr(s32) = G_LOAD %23 :: (load 4 from %ir.const0, addrspace 4)
|
||
|
$sgpr0 = COPY %24
|
||
|
|
||
|
%25:sgpr(s64) = G_CONSTANT i64 4294967296
|
||
|
%26:sgpr(p4) = G_PTR_ADD %0, %25
|
||
|
%27:sgpr(s32) = G_LOAD %26 :: (load 4 from %ir.const0, addrspace 4)
|
||
|
$sgpr0 = COPY %27
|
||
|
|
||
|
%28:sgpr(p0) = G_LOAD %0 :: (load 8 from %ir.const0, addrspace 4)
|
||
|
$sgpr0_sgpr1 = COPY %28
|
||
|
|
||
|
%29:sgpr(p1) = G_LOAD %0 :: (load 8 from %ir.const0, addrspace 4)
|
||
|
$sgpr0_sgpr1 = COPY %29
|
||
|
|
||
|
%30:sgpr(p4) = G_LOAD %0 :: (load 8 from %ir.const0, addrspace 4)
|
||
|
$sgpr0_sgpr1 = COPY %30
|
||
|
|
||
|
...
|
||
|
---
|
||
|
|
||
|
name: smrd_wide
|
||
|
legalized: true
|
||
|
regBankSelected: true
|
||
|
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $sgpr0_sgpr1, $vgpr2_vgpr3
|
||
|
%0:sgpr(p4) = COPY $sgpr0_sgpr1
|
||
|
%1:sgpr(p1) = COPY $sgpr2_sgpr3
|
||
|
|
||
|
; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
|
||
|
; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3
|
||
|
; CHECK: s_load_dwordx8 [[CONSTANT_PTR]]
|
||
|
%2:sgpr(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4)
|
||
|
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2
|
||
|
|
||
|
; CHECK: s_load_dwordx16 [[CONSTANT_PTR]]
|
||
|
%3:sgpr(<16 x s32>) = G_LOAD %0 :: (load 64, addrspace 4)
|
||
|
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %3
|
||
|
|
||
|
; CHECK: s_load_dwordx8 [[GLOBAL_PTR]]
|
||
|
%4:sgpr(<8 x s32>) = G_LOAD %1 :: (load 32, addrspace 1)
|
||
|
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4
|
||
|
|
||
|
; CHECK s_load_dwordx16 [[GLOBAL_PTR]]
|
||
|
%5:sgpr(<16 x s32>) = G_LOAD %1 :: (load 64, addrspace 1)
|
||
|
$sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %5
|
||
|
...
|
||
|
|
||
|
|
||
|
# Test a load of an offset from a constant base address
|
||
|
# GCN-LABEL: name: constant_address_positive{{$}}
|
||
|
# GCN: %0:sreg_64 = S_MOV_B64 44
|
||
|
|
||
|
# VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0, 0 :: (dereferenceable invariant load 4, addrspace 4)
|
||
|
# SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0, 0 :: (dereferenceable invariant load 4, addrspace 4)
|
||
|
|
||
|
---
|
||
|
|
||
|
name: constant_address_positive
|
||
|
legalized: true
|
||
|
regBankSelected: true
|
||
|
|
||
|
body: |
|
||
|
bb.0:
|
||
|
liveins: $sgpr0_sgpr1, $vgpr2_vgpr3
|
||
|
%0:sgpr(p4) = G_CONSTANT i64 44
|
||
|
%1:sgpr(s64) = G_CONSTANT i64 64
|
||
|
%2:sgpr(p4) = G_PTR_ADD %0, %1
|
||
|
%3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load 4, align 4, addrspace 4)
|
||
|
S_ENDPGM 0, implicit %3
|
||
|
...
|