[clang] 77579b9 - [openmp][nfc] Replace OMPGridValues array with struct
Jon Chesterfield via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 19 05:25:56 PDT 2021
Author: Jon Chesterfield
Date: 2021-08-19T13:25:42+01:00
New Revision: 77579b99e9ce1638ca696fa7c3872ae8668d997d
URL: https://github.com/llvm/llvm-project/commit/77579b99e9ce1638ca696fa7c3872ae8668d997d
DIFF: https://github.com/llvm/llvm-project/commit/77579b99e9ce1638ca696fa7c3872ae8668d997d.diff
LOG: [openmp][nfc] Replace OMPGridValues array with struct
[nfc] Replaces enum indices into an array with a struct. Named the
fields to match the enum, leaves memory layout and initialization unchanged.
Motivation is to later safely remove dead fields and replace redundant ones
with (compile time) computation. It should also be possible to factor some
common fields into a base and introduce a gfx10 amdgpu instance with less
duplication than the arrays of integers require.
Reviewed By: ronlieb
Differential Revision: https://reviews.llvm.org/D108339
Added:
Modified:
clang/include/clang/Basic/TargetInfo.h
clang/lib/Basic/Targets/AMDGPU.cpp
clang/lib/Basic/Targets/NVPTX.cpp
clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 21289b0dfd04..ab855948b447 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -210,8 +210,8 @@ class TargetInfo : public virtual TransferrableTargetInfo,
unsigned char RegParmMax, SSERegParmMax;
TargetCXXABI TheCXXABI;
const LangASMap *AddrSpaceMap;
- const unsigned *GridValues =
- nullptr; // Array of target-specific GPU grid values that must be
+ const llvm::omp::GV *GridValues =
+ nullptr; // target-specific GPU grid values that must be
// consistent between host RTL (plugin), device RTL, and clang.
mutable StringRef PlatformName;
@@ -1410,10 +1410,10 @@ class TargetInfo : public virtual TransferrableTargetInfo,
return LangAS::Default;
}
- /// Return a target-specific GPU grid value based on the GVIDX enum \p gv
- unsigned getGridValue(llvm::omp::GVIDX gv) const {
+ /// Return a target-specific GPU grid values
+ const llvm::omp::GV &getGridValue() const {
assert(GridValues != nullptr && "GridValues not initialized");
- return GridValues[gv];
+ return *GridValues;
}
/// Retrieve the name of the platform as it is used in the
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index fac786dbcf9e..cebb19e7ccab 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -335,7 +335,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
llvm::AMDGPU::getArchAttrR600(GPUKind)) {
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
: DataLayoutStringR600);
- GridValues = llvm::omp::AMDGPUGpuGridValues;
+ GridValues = &llvm::omp::AMDGPUGridValues;
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
!isAMDGCN(Triple));
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 56f8a179db3c..d1a34e4a81c5 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -65,7 +65,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
TLSSupported = false;
VLASupported = false;
AddrSpaceMap = &NVPTXAddrSpaceMap;
- GridValues = llvm::omp::NVPTXGpuGridValues;
+ GridValues = &llvm::omp::NVPTXGridValues;
UseAddrSpaceMapMangling = true;
// Define available target features
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
index 33d4ab838af1..cac5faaa8d0f 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeAMDGCN.cpp
@@ -20,6 +20,7 @@
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Cuda.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
using namespace clang;
@@ -35,7 +36,7 @@ CGOpenMPRuntimeAMDGCN::CGOpenMPRuntimeAMDGCN(CodeGenModule &CGM)
llvm::Value *CGOpenMPRuntimeAMDGCN::getGPUWarpSize(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
// return constant compile-time target-specific warp size
- unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
return Bld.getInt32(WarpSize);
}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 63fecedc6fb7..b13d55994ef6 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -339,7 +339,7 @@ class CheckVarsEscapingDeclContext final
assert(!GlobalizedRD &&
"Record for globalized variables is built already.");
ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;
- unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
if (IsInTTDRegion)
EscapedDeclsForTeams = EscapedDecls.getArrayRef();
else
@@ -535,8 +535,7 @@ class CheckVarsEscapingDeclContext final
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
- unsigned LaneIDBits =
- CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size_Log2);
+ unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
}
@@ -546,8 +545,8 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
- unsigned LaneIDMask = CGF.getContext().getTargetInfo().getGridValue(
- llvm::omp::GV_Warp_Size_Log2_Mask);
+ unsigned LaneIDMask =
+ CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
"nvptx_lane_id");
@@ -1308,7 +1307,7 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
const RecordDecl *GlobalizedRD = nullptr;
llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
- unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGM.getTarget().getGridValue().GV_Warp_Size;
// Globalize team reductions variable unconditionally in all modes.
if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
@@ -2089,7 +2088,7 @@ static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
"__openmp_nvptx_data_transfer_temporary_storage";
llvm::GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
- unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ unsigned WarpSize = CGF.getTarget().getGridValue().GV_Warp_Size;
if (!TransferMedium) {
auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
index b5f1b843c46b..5d3b711e6d4b 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -17,7 +17,6 @@
#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
#include "clang/AST/StmtOpenMP.h"
-#include "llvm/Frontend/OpenMP/OMPGridValues.h"
namespace clang {
namespace CodeGen {
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
index 0b6aed1e9e12..1d7735ebf72d 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -29,68 +29,69 @@ namespace omp {
///
/// Example usage in clang:
/// const unsigned slot_size =
-/// ctx.GetTargetInfo().getGridValue(llvm::omp::GVIDX::GV_Warp_Size);
+/// ctx.GetTargetInfo().getGridValue().GV_Warp_Size;
///
/// Example usage in libomptarget/deviceRTLs:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
/// #ifdef __AMDGPU__
-/// #define GRIDVAL AMDGPUGpuGridValues
+/// #define GRIDVAL AMDGPUGridValues
/// #else
-/// #define GRIDVAL NVPTXGpuGridValues
+/// #define GRIDVAL NVPTXGridValues
/// #endif
/// ... Then use this reference for GV_Warp_Size in the deviceRTL source.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
/// Example usage in libomptarget hsa plugin:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-/// #define GRIDVAL AMDGPUGpuGridValues
+/// #define GRIDVAL AMDGPUGridValues
/// ... Then use this reference to access GV_Warp_Size in the hsa plugin.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
/// Example usage in libomptarget cuda plugin:
/// #include "llvm/Frontend/OpenMP/OMPGridValues.h"
-/// #define GRIDVAL NVPTXGpuGridValues
+/// #define GRIDVAL NVPTXGridValues
/// ... Then use this reference to access GV_Warp_Size in the cuda plugin.
-/// llvm::omp::GRIDVAL[llvm::omp::GVIDX::GV_Warp_Size]
+/// llvm::omp::GRIDVAL().GV_Warp_Size
///
-enum GVIDX {
+
+struct GV {
/// The maximum number of workers in a kernel.
/// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
- GV_Threads,
+ const unsigned GV_Threads;
/// The size reserved for data in a shared memory slot.
- GV_Slot_Size,
+ const unsigned GV_Slot_Size;
/// The default value of maximum number of threads in a worker warp.
- GV_Warp_Size,
+ const unsigned GV_Warp_Size;
/// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
/// for NVPTX.
- GV_Warp_Size_32,
+ const unsigned GV_Warp_Size_32;
/// The number of bits required to represent the max number of threads in warp
- GV_Warp_Size_Log2,
+ const unsigned GV_Warp_Size_Log2;
/// GV_Warp_Size * GV_Slot_Size,
- GV_Warp_Slot_Size,
+ const unsigned GV_Warp_Slot_Size;
/// the maximum number of teams.
- GV_Max_Teams,
+ const unsigned GV_Max_Teams;
/// Global Memory Alignment
- GV_Mem_Align,
+ const unsigned GV_Mem_Align;
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
- GV_Warp_Size_Log2_Mask,
+ const unsigned GV_Warp_Size_Log2_Mask;
// An alternative to the heavy data sharing infrastructure that uses global
// memory is one that uses device __shared__ memory. The amount of such space
// (in bytes) reserved by the OpenMP runtime is noted here.
- GV_SimpleBufferSize,
+ const unsigned GV_SimpleBufferSize;
// The absolute maximum team size for a working group
- GV_Max_WG_Size,
+ const unsigned GV_Max_WG_Size;
// The default maximum team size for a working group
- GV_Default_WG_Size,
+ const unsigned GV_Default_WG_Size;
// This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
- GV_Max_Warp_Number,
+ const unsigned GV_Max_Warp_Number;
/// The slot size that should be reserved for a working warp.
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
- GV_Warp_Size_Log2_MaskL
+ const unsigned GV_Warp_Size_Log2_MaskL;
};
/// For AMDGPU GPUs
-static constexpr unsigned AMDGPUGpuGridValues[] = {
+static constexpr GV AMDGPUGridValues = {
448, // GV_Threads
256, // GV_Slot_Size
64, // GV_Warp_Size
@@ -108,7 +109,7 @@ static constexpr unsigned AMDGPUGpuGridValues[] = {
};
/// For Nvidia GPUs
-static constexpr unsigned NVPTXGpuGridValues[] = {
+static constexpr GV NVPTXGridValues = {
992, // GV_Threads
256, // GV_Slot_Size
32, // GV_Warp_Size
diff --git a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
index 4865ef6ffbba..31adc72e9b92 100644
--- a/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
+++ b/openmp/libomptarget/plugins/amdgpu/src/rtl.cpp
@@ -501,14 +501,11 @@ class RTLDeviceInfoTy {
static const unsigned HardTeamLimit =
(1 << 16) - 1; // 64K needed to fit in uint16
static const int DefaultNumTeams = 128;
- static const int Max_Teams =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Max_Teams];
- static const int Warp_Size =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size];
- static const int Max_WG_Size =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Max_WG_Size];
+ static const int Max_Teams = llvm::omp::AMDGPUGridValues.GV_Max_Teams;
+ static const int Warp_Size = llvm::omp::AMDGPUGridValues.GV_Warp_Size;
+ static const int Max_WG_Size = llvm::omp::AMDGPUGridValues.GV_Max_WG_Size;
static const int Default_WG_Size =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Default_WG_Size];
+ llvm::omp::AMDGPUGridValues.GV_Default_WG_Size;
using MemcpyFunc = hsa_status_t (*)(hsa_signal_t, void *, const void *,
size_t size, hsa_agent_t);
@@ -1058,9 +1055,8 @@ int32_t __tgt_rtl_init_device(int device_id) {
DeviceInfo.WarpSize[device_id] = wavefront_size;
} else {
DP("Default wavefront size: %d\n",
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size]);
- DeviceInfo.WarpSize[device_id] =
- llvm::omp::AMDGPUGpuGridValues[llvm::omp::GVIDX::GV_Warp_Size];
+ llvm::omp::AMDGPUGridValues.GV_Warp_Size);
+ DeviceInfo.WarpSize[device_id] = llvm::omp::AMDGPUGridValues.GV_Warp_Size;
}
// Adjust teams to the env variables
More information about the cfe-commits
mailing list