86 lines
3.4 KiB
Plaintext
86 lines
3.4 KiB
Plaintext
// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
|
|
// RUN: -triple x86_64-linux-gnu \
|
|
// RUN: | FileCheck -check-prefix=HOST %s
|
|
// RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \
|
|
// RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \
|
|
// RUN: | FileCheck -check-prefix=DEV %s
|
|
|
|
#include "Inputs/cuda.h"
|
|
|
|
// Device side kernel name.
|
|
// HOST: @[[KERN_CAPTURE:[0-9]+]] = {{.*}} c"_Z1gIZ12test_capturevEUlvE_EvT_\00"
|
|
// HOST: @[[KERN_RESOLVE:[0-9]+]] = {{.*}} c"_Z1gIZ12test_resolvevEUlvE_EvT_\00"
|
|
|
|
// Check functions emitted for test_capture in host compilation.
|
|
// Check lambda is not emitted in host compilation.
|
|
// HOST-LABEL: define{{.*}} void @_Z12test_capturev
|
|
// HOST: call void @_Z19test_capture_helperIZ12test_capturevEUlvE_EvT_
|
|
// HOST-LABEL: define internal void @_Z19test_capture_helperIZ12test_capturevEUlvE_EvT_
|
|
// HOST: call void @_Z16__device_stub__gIZ12test_capturevEUlvE_EvT_
|
|
// HOST-NOT: define{{.*}}@_ZZ4mainENKUlvE_clEv
|
|
|
|
// Check functions emitted for test_resolve in host compilation.
|
|
// Check host version of template function 'overloaded' is emitted and called
|
|
// by the lambda function.
|
|
// HOST-LABEL: define{{.*}} void @_Z12test_resolvev
|
|
// HOST: call void @_Z19test_resolve_helperIZ12test_resolvevEUlvE_EvT_()
|
|
// HOST-LABEL: define internal void @_Z19test_resolve_helperIZ12test_resolvevEUlvE_EvT_
|
|
// HOST: call void @_Z16__device_stub__gIZ12test_resolvevEUlvE_EvT_
|
|
// HOST: call void @_ZZ12test_resolvevENKUlvE_clEv
|
|
// HOST-LABEL: define internal void @_ZZ12test_resolvevENKUlvE_clEv
|
|
// HOST: call i32 @_Z10overloadedIiET_v
|
|
// HOST-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v
|
|
// HOST: ret i32 2
|
|
|
|
// Check kernel is registered with correct device side kernel name.
|
|
// HOST: @__hipRegisterFunction({{.*}}@[[KERN_CAPTURE]]
|
|
// HOST: @__hipRegisterFunction({{.*}}@[[KERN_RESOLVE]]
|
|
|
|
// DEV: @a ={{.*}} addrspace(1) externally_initialized global i32 0
|
|
|
|
// Check functions emitted for test_capture in device compilation.
|
|
// Check lambda is emitted in device compilation and accessing device variable.
|
|
// DEV-LABEL: define{{.*}} amdgpu_kernel void @_Z1gIZ12test_capturevEUlvE_EvT_
|
|
// DEV: call void @_ZZ12test_capturevENKUlvE_clEv
|
|
// DEV-LABEL: define internal void @_ZZ12test_capturevENKUlvE_clEv
|
|
// DEV: store i32 1, i32* addrspacecast (i32 addrspace(1)* @a to i32*)
|
|
|
|
// Check functions emitted for test_resolve in device compilation.
|
|
// Check device version of template function 'overloaded' is emitted and called
|
|
// by the lambda function.
|
|
// DEV-LABEL: define{{.*}} amdgpu_kernel void @_Z1gIZ12test_resolvevEUlvE_EvT_
|
|
// DEV: call void @_ZZ12test_resolvevENKUlvE_clEv
|
|
// DEV-LABEL: define internal void @_ZZ12test_resolvevENKUlvE_clEv
|
|
// DEV: call i32 @_Z10overloadedIiET_v
|
|
// DEV-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v
|
|
// DEV: ret i32 1
|
|
|
|
__device__ int a;
|
|
|
|
template<class T>
|
|
__device__ T overloaded() { return 1; }
|
|
|
|
template<class T>
|
|
__host__ T overloaded() { return 2; }
|
|
|
|
template<class F>
|
|
__global__ void g(F f) { f(); }
|
|
|
|
template<class F>
|
|
void test_capture_helper(F f) { g<<<1,1>>>(f); }
|
|
|
|
template<class F>
|
|
void test_resolve_helper(F f) { g<<<1,1>>>(f); f(); }
|
|
|
|
// Test capture of device variable in lambda function.
|
|
void test_capture(void) {
|
|
test_capture_helper([](){ a = 1;});
|
|
}
|
|
|
|
// Test resolving host/device function in lambda function.
|
|
// Callee should resolve to correct host/device function based on where
|
|
// the lambda function is called, not where it is defined.
|
|
void test_resolve(void) {
|
|
test_resolve_helper([](){ overloaded<int>();});
|
|
}
|