llvm-for-llvmta/tools/clang/test/CodeGenCUDA/link-device-bitcode.cu

// Test for linking with CUDA's libdevice as outlined in
// http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice
//
// REQUIRES: nvptx-registered-target
//
// Prepare bitcode file to link with
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
// RUN:    -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \
// RUN:    -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll
//
// Make sure function in device-code gets linked in and internalized.
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
// RUN:    -mlink-builtin-bitcode %t.bc  -emit-llvm \
// RUN:    -disable-llvm-passes -o - %s \
// RUN:    | FileCheck %s -check-prefix CHECK-IR

// Make sure legacy flag name works
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
// RUN:    -mlink-cuda-bitcode %t.bc  -emit-llvm \
// RUN:    -disable-llvm-passes -o - %s \
// RUN:    | FileCheck %s -check-prefix CHECK-IR
//
// Make sure we can link two bitcode files.
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
// RUN:    -mlink-builtin-bitcode %t.bc -mlink-builtin-bitcode %t-2.bc \
// RUN:    -emit-llvm -disable-llvm-passes -o - %s \
// RUN:    | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2
//
// Make sure function in device-code gets linked but is not internalized
// without -fcuda-uses-libdevice
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
// RUN:    -mlink-bitcode-file %t.bc -emit-llvm \
// RUN:    -disable-llvm-passes -o - %s \
// RUN:    | FileCheck %s -check-prefix CHECK-IR-NLD
//
// Make sure NVVMReflect pass is enabled in NVPTX back-end.
// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \
// RUN:    -mlink-builtin-bitcode %t.bc -S -o /dev/null %s \
// RUN:    -mllvm -debug-pass=Structure 2>&1 \
// RUN:    | FileCheck %s -check-prefix CHECK-REFLECT

#include "Inputs/cuda.h"

__device__ float device_mul_or_add(float a, float b);
extern "C" __device__ double __nv_sin(double x);
extern "C" __device__ double __nv_exp(double x);

// CHECK-IR-LABEL: define{{.*}} void @_Z26should_not_be_internalizedPf(
// CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(
__device__ void should_not_be_internalized(float *data) {}

// Make sure kernel call has not been internalized.
// CHECK-IR-LABEL: define{{.*}} void @_Z6kernelPfS_
// CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(
__global__ __attribute__((used)) void kernel(float *out, float *in) {
  *out = device_mul_or_add(in[0], in[1]);
  *out += __nv_exp(__nv_sin(*out));
  should_not_be_internalized(out);
}

// Make sure device_mul_or_add() is present in IR, is internal and
// calls __nvvm_reflect().
// CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(
// CHECK-IR-NLD-LABEL: define{{.*}} float @_Z17device_mul_or_addff(
// CHECK-IR: call i32 @__nvvm_reflect
// CHECK-IR: ret float

// Make sure we've linked in and internalized only needed functions
// from the second bitcode file.
// CHECK-IR-2-LABEL: define internal double @__nv_sin
// CHECK-IR-2-LABEL: define internal double @__nv_exp
// CHECK-IR-2-NOT: double @__unused

// Verify that NVVMReflect pass is among the passes run by NVPTX back-end.
// CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1
added clang 2022-04-25 13:02:35 +02:00			`// Test for linking with CUDA's libdevice as outlined in`
			`// http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice`
			`//`
			`// REQUIRES: nvptx-registered-target`
			`//`
			`// Prepare bitcode file to link with`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \`
			`// RUN: -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \`
			`// RUN: -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll`
			`//`
			`// Make sure function in device-code gets linked in and internalized.`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \`
			`// RUN: -mlink-builtin-bitcode %t.bc -emit-llvm \`
			`// RUN: -disable-llvm-passes -o - %s \`
			`// RUN: \| FileCheck %s -check-prefix CHECK-IR`

			`// Make sure legacy flag name works`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \`
			`// RUN: -mlink-cuda-bitcode %t.bc -emit-llvm \`
			`// RUN: -disable-llvm-passes -o - %s \`
			`// RUN: \| FileCheck %s -check-prefix CHECK-IR`
			`//`
			`// Make sure we can link two bitcode files.`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \`
			`// RUN: -mlink-builtin-bitcode %t.bc -mlink-builtin-bitcode %t-2.bc \`
			`// RUN: -emit-llvm -disable-llvm-passes -o - %s \`
			`// RUN: \| FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2`
			`//`
			`// Make sure function in device-code gets linked but is not internalized`
			`// without -fcuda-uses-libdevice`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \`
			`// RUN: -mlink-bitcode-file %t.bc -emit-llvm \`
			`// RUN: -disable-llvm-passes -o - %s \`
			`// RUN: \| FileCheck %s -check-prefix CHECK-IR-NLD`
			`//`
			`// Make sure NVVMReflect pass is enabled in NVPTX back-end.`
			`// RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \`
			`// RUN: -mlink-builtin-bitcode %t.bc -S -o /dev/null %s \`
			`// RUN: -mllvm -debug-pass=Structure 2>&1 \`
			`// RUN: \| FileCheck %s -check-prefix CHECK-REFLECT`

			`#include "Inputs/cuda.h"`

			`__device__ float device_mul_or_add(float a, float b);`
			`extern "C" __device__ double __nv_sin(double x);`
			`extern "C" __device__ double __nv_exp(double x);`

			`// CHECK-IR-LABEL: define{{.*}} void @_Z26should_not_be_internalizedPf(`
			`// CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf(`
			`__device__ void should_not_be_internalized(float *data) {}`

			`// Make sure kernel call has not been internalized.`
			`// CHECK-IR-LABEL: define{{.*}} void @_Z6kernelPfS_`
			`// CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_(`
			`__global__ __attribute__((used)) void kernel(float out, float in) {`
			`*out = device_mul_or_add(in[0], in[1]);`
			`out += __nv_exp(__nv_sin(out));`
			`should_not_be_internalized(out);`
			`}`

			`// Make sure device_mul_or_add() is present in IR, is internal and`
			`// calls __nvvm_reflect().`
			`// CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff(`
			`// CHECK-IR-NLD-LABEL: define{{.*}} float @_Z17device_mul_or_addff(`
			`// CHECK-IR: call i32 @__nvvm_reflect`
			`// CHECK-IR: ret float`

			`// Make sure we've linked in and internalized only needed functions`
			`// from the second bitcode file.`
			`// CHECK-IR-2-LABEL: define internal double @__nv_sin`
			`// CHECK-IR-2-LABEL: define internal double @__nv_exp`
			`// CHECK-IR-2-NOT: double @__unused`

			`// Verify that NVVMReflect pass is among the passes run by NVPTX back-end.`
			`// CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1`