[clang] b1efefa - Revert "[openmp][nfc] Refactor GridValues"
Jon Chesterfield via cfe-commits
cfe-commits at lists.llvm.org
Fri Aug 20 10:17:51 PDT 2021
Author: Jon Chesterfield
Date: 2021-08-20T18:17:27+01:00
New Revision: b1efeface70c26f2f2e30636943c02f356ce4faa
URL: https://github.com/llvm/llvm-project/commit/b1efeface70c26f2f2e30636943c02f356ce4faa
DIFF: https://github.com/llvm/llvm-project/commit/b1efeface70c26f2f2e30636943c02f356ce4faa.diff
LOG: Revert "[openmp][nfc] Refactor GridValues"
Failed a nvptx codegen test
This reverts commit 2a47a84b40115b01e03e4d89c1d47ba74beb7bf3.
Added:
Modified:
clang/include/clang/Basic/TargetInfo.h
clang/lib/Basic/Targets/AMDGPU.cpp
clang/lib/Basic/Targets/AMDGPU.h
clang/lib/Basic/Targets/NVPTX.cpp
clang/lib/Basic/Targets/NVPTX.h
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index fe6f67d40b53..ab855948b447 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -210,6 +210,9 @@ class TargetInfo : public virtual TransferrableTargetInfo,
unsigned char RegParmMax, SSERegParmMax;
TargetCXXABI TheCXXABI;
const LangASMap *AddrSpaceMap;
+ const llvm::omp::GV *GridValues =
+ nullptr; // target-specific GPU grid values that must be
+ // consistent between host RTL (plugin), device RTL, and clang.
mutable StringRef PlatformName;
mutable VersionTuple PlatformMinVersion;
@@ -1407,10 +1410,10 @@ class TargetInfo : public virtual TransferrableTargetInfo,
return LangAS::Default;
}
- // access target-specific GPU grid values that must be consistent between
- // host RTL (plugin), deviceRTL and clang.
- virtual const llvm::omp::GV &getGridValue() const {
- llvm_unreachable("getGridValue not implemented on this target");
+ /// Return a target-specific GPU grid values
+ const llvm::omp::GV &getGridValue() const {
+ assert(GridValues != nullptr && "GridValues not initialized");
+ return *GridValues;
}
/// Retrieve the name of the platform as it is used in the
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index ba7ffa34c73e..cebb19e7ccab 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -17,6 +17,7 @@
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
using namespace clang;
using namespace clang::targets;
@@ -334,6 +335,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
llvm::AMDGPU::getArchAttrR600(GPUKind)) {
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
: DataLayoutStringR600);
+ GridValues = &llvm::omp::AMDGPUGridValues;
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
!isAMDGCN(Triple));
diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h
index e791a83f38ae..77c2c5fd5014 100644
--- a/clang/lib/Basic/Targets/AMDGPU.h
+++ b/clang/lib/Basic/Targets/AMDGPU.h
@@ -370,10 +370,6 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo {
return getLangASFromTargetAS(Constant);
}
- const llvm::omp::GV &getGridValue() const override {
- return llvm::omp::AMDGPUGridValues;
- }
-
/// \returns Target specific vtbl ptr address space.
unsigned getVtblPtrAddressSpace() const override {
return static_cast<unsigned>(Constant);
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index c245753c93f4..d1a34e4a81c5 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -16,6 +16,7 @@
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
using namespace clang;
using namespace clang::targets;
@@ -64,6 +65,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
TLSSupported = false;
VLASupported = false;
AddrSpaceMap = &NVPTXAddrSpaceMap;
+ GridValues = &llvm::omp::NVPTXGridValues;
UseAddrSpaceMapMangling = true;
// Define available target features
diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h
index ef751b8e1a8d..c7db3cdaaf10 100644
--- a/clang/lib/Basic/Targets/NVPTX.h
+++ b/clang/lib/Basic/Targets/NVPTX.h
@@ -147,10 +147,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo {
Opts["cl_khr_local_int32_extended_atomics"] = true;
}
- const llvm::omp::GV &getGridValue() const override {
- return llvm::omp::NVPTXGridValues;
- }
-
/// \returns If a target requires an address within a target specific address
/// space \p AddressSpace to be converted in order to be used, then return the
/// corresponding target specific DWARF address space.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index 0e392c263471..b13d55994ef6 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -22,7 +22,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
-#include "llvm/Support/MathExtras.h"
using namespace clang;
using namespace CodeGen;
@@ -107,7 +106,8 @@ class ExecutionRuntimeModesRAII {
/// is the same for all known NVPTX architectures.
enum MachineConfiguration : unsigned {
/// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
- /// specific Grid Values like GV_Warp_Size, GV_Slot_Size
+ /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
+ /// and GV_Warp_Size_Log2_Mask.
/// Global memory alignment for performance.
GlobalMemoryAlignment = 128,
@@ -535,8 +535,7 @@ class CheckVarsEscapingDeclContext final
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
- unsigned LaneIDBits =
- llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
+ unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
}
@@ -546,9 +545,8 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
- unsigned LaneIDBits =
- llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
- unsigned LaneIDMask = ~0 >> (32u - LaneIDBits);
+ unsigned LaneIDMask =
+ CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
"nvptx_lane_id");
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
index 2130b9bf91cd..1d7735ebf72d 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -62,13 +62,19 @@ struct GV {
const unsigned GV_Slot_Size;
/// The default value of maximum number of threads in a worker warp.
const unsigned GV_Warp_Size;
-
- constexpr unsigned warpSlotSize() const {
- return GV_Warp_Size * GV_Slot_Size;
- }
-
+ /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
+ /// for NVPTX.
+ const unsigned GV_Warp_Size_32;
+ /// The number of bits required to represent the max number of threads in warp
+ const unsigned GV_Warp_Size_Log2;
+ /// GV_Warp_Size * GV_Slot_Size,
+ const unsigned GV_Warp_Slot_Size;
/// the maximum number of teams.
const unsigned GV_Max_Teams;
+ /// Global Memory Alignment
+ const unsigned GV_Mem_Align;
+ /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
+ const unsigned GV_Warp_Size_Log2_Mask;
// An alternative to the heavy data sharing infrastructure that uses global
// memory is one that uses device __shared__ memory. The amount of such space
// (in bytes) reserved by the OpenMP runtime is noted here.
@@ -77,32 +83,47 @@ struct GV {
const unsigned GV_Max_WG_Size;
// The default maximum team size for a working group
const unsigned GV_Default_WG_Size;
-
- constexpr unsigned maxWarpNumber() const {
- return GV_Max_WG_Size / GV_Warp_Size;
- }
+ // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
+ const unsigned GV_Max_Warp_Number;
+ /// The slot size that should be reserved for a working warp.
+ /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
+ const unsigned GV_Warp_Size_Log2_MaskL;
};
/// For AMDGPU GPUs
static constexpr GV AMDGPUGridValues = {
- 448, // GV_Threads
- 256, // GV_Slot_Size
- 64, // GV_Warp_Size
- 128, // GV_Max_Teams
- 896, // GV_SimpleBufferSize
- 1024, // GV_Max_WG_Size,
- 256, // GV_Default_WG_Size
+ 448, // GV_Threads
+ 256, // GV_Slot_Size
+ 64, // GV_Warp_Size
+ 32, // GV_Warp_Size_32
+ 6, // GV_Warp_Size_Log2
+ 64 * 256, // GV_Warp_Slot_Size
+ 128, // GV_Max_Teams
+ 256, // GV_Mem_Align
+ 63, // GV_Warp_Size_Log2_Mask
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size,
+ 256, // GV_Defaut_WG_Size
+ 1024 / 64, // GV_Max_WG_Size / GV_WarpSize
+ 63 // GV_Warp_Size_Log2_MaskL
};
/// For Nvidia GPUs
static constexpr GV NVPTXGridValues = {
- 992, // GV_Threads
- 256, // GV_Slot_Size
- 32, // GV_Warp_Size
- 1024, // GV_Max_Teams
- 896, // GV_SimpleBufferSize
- 1024, // GV_Max_WG_Size
- 128, // GV_Default_WG_Size
+ 992, // GV_Threads
+ 256, // GV_Slot_Size
+ 32, // GV_Warp_Size
+ 32, // GV_Warp_Size_32
+ 5, // GV_Warp_Size_Log2
+ 32 * 256, // GV_Warp_Slot_Size
+ 1024, // GV_Max_Teams
+ 256, // GV_Mem_Align
+ (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
+ 896, // GV_SimpleBufferSize
+ 1024, // GV_Max_WG_Size
+ 128, // GV_Defaut_WG_Size
+ 1024 / 32, // GV_Max_WG_Size / GV_WarpSize
+ 31 // GV_Warp_Size_Log2_MaskL
};
} // namespace omp
More information about the cfe-commits
mailing list