132 lines
4.9 KiB
C++
132 lines
4.9 KiB
C++
//====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
///
|
|
/// \file
|
|
/// \brief Provides definitions for Target specific Grid Values
|
|
///
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_OPENMP_GRIDVALUES_H
|
|
#define LLVM_OPENMP_GRIDVALUES_H
|
|
|
|
namespace llvm {
|
|
|
|
namespace omp {
|
|
|
|
/// \brief Defines various target-specific GPU grid values that must be
|
|
/// consistent between host RTL (plugin), device RTL, and clang.
|
|
/// We can change grid values for a "fat" binary so that different
|
|
/// passes get the correct values when generating code for a
|
|
/// multi-target binary. Both amdgcn and nvptx values are stored in
|
|
/// this file. In the future, should there be differences between GPUs
|
|
/// of the same architecture, then simply make a different array and
|
|
/// use the new array name.
|
|
///
|
|
/// Example usage in clang:
|
|
/// const unsigned slot_size =
|
|
/// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
|
|
///
|
|
/// Example usage in libomptarget/deviceRTLs:
|
|
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
|
/// #ifdef __AMDGPU__
|
|
/// #define GRIDVAL AMDGPUGpuGridValues
|
|
/// #else
|
|
/// #define GRIDVAL NVPTXGpuGridValues
|
|
/// #endif
|
|
/// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
|
|
/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
|
|
///
|
|
/// Example usage in libomptarget hsa plugin:
|
|
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
|
/// #define GRIDVAL AMDGPUGpuGridValues
|
|
/// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
|
|
/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
|
|
///
|
|
/// Example usage in libomptarget cuda plugin:
|
|
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
|
/// #define GRIDVAL NVPTXGpuGridValues
|
|
/// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
|
|
/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
|
|
///
|
|
enum GVIDX {
|
|
/// The maximum number of workers in a kernel.
|
|
/// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
|
|
GV_Threads,
|
|
/// The size reserved for data in a shared memory slot.
|
|
GV_Slot_Size,
|
|
/// The default value of maximum number of threads in a worker warp.
|
|
GV_Warp_Size,
|
|
/// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
|
|
/// for NVPTX.
|
|
GV_Warp_Size_32,
|
|
/// The number of bits required to represent the max number of threads in warp
|
|
GV_Warp_Size_Log2,
|
|
/// GV_Warp_Size * GV_Slot_Size,
|
|
GV_Warp_Slot_Size,
|
|
/// the maximum number of teams.
|
|
GV_Max_Teams,
|
|
/// Global Memory Alignment
|
|
GV_Mem_Align,
|
|
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
|
|
GV_Warp_Size_Log2_Mask,
|
|
// An alternative to the heavy data sharing infrastructure that uses global
|
|
// memory is one that uses device __shared__ memory. The amount of such space
|
|
// (in bytes) reserved by the OpenMP runtime is noted here.
|
|
GV_SimpleBufferSize,
|
|
// The absolute maximum team size for a working group
|
|
GV_Max_WG_Size,
|
|
// The default maximum team size for a working group
|
|
GV_Default_WG_Size,
|
|
// This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
|
|
GV_Max_Warp_Number,
|
|
/// The slot size that should be reserved for a working warp.
|
|
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
|
|
GV_Warp_Size_Log2_MaskL
|
|
};
|
|
|
|
/// For AMDGPU GPUs
|
|
static constexpr unsigned AMDGPUGpuGridValues[] = {
|
|
448, // GV_Threads
|
|
256, // GV_Slot_Size
|
|
64, // GV_Warp_Size
|
|
32, // GV_Warp_Size_32
|
|
6, // GV_Warp_Size_Log2
|
|
64 * 256, // GV_Warp_Slot_Size
|
|
128, // GV_Max_Teams
|
|
256, // GV_Mem_Align
|
|
63, // GV_Warp_Size_Log2_Mask
|
|
896, // GV_SimpleBufferSize
|
|
1024, // GV_Max_WG_Size,
|
|
256, // GV_Defaut_WG_Size
|
|
1024 / 64, // GV_Max_WG_Size / GV_WarpSize
|
|
63 // GV_Warp_Size_Log2_MaskL
|
|
};
|
|
|
|
/// For Nvidia GPUs
|
|
static constexpr unsigned NVPTXGpuGridValues[] = {
|
|
992, // GV_Threads
|
|
256, // GV_Slot_Size
|
|
32, // GV_Warp_Size
|
|
32, // GV_Warp_Size_32
|
|
5, // GV_Warp_Size_Log2
|
|
32 * 256, // GV_Warp_Slot_Size
|
|
1024, // GV_Max_Teams
|
|
256, // GV_Mem_Align
|
|
(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
|
|
896, // GV_SimpleBufferSize
|
|
1024, // GV_Max_WG_Size
|
|
128, // GV_Defaut_WG_Size
|
|
1024 / 32, // GV_Max_WG_Size / GV_WarpSize
|
|
31 // GV_Warp_Size_Log2_MaskL
|
|
};
|
|
|
|
} // namespace omp
|
|
} // namespace llvm
|
|
|
|
#endif // LLVM_OPENMP_GRIDVALUES_H
|