[PATCH] D83492: [OpenMP] Use common interface to access GPU Grid Values
Saiyedul Islam via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 9 09:56:04 PDT 2020
saiislam created this revision.
saiislam added reviewers: jdoerfert, ABataev, JonChesterfield.
Herald added subscribers: cfe-commits, sstefan1, guansong, yaxunl, jholewinski.
Herald added a project: clang.
Use common interface for accessing target specific GPU grid values in NVPTX
OpenMP codegen as proposed in https://reviews.llvm.org/D80917
Originally authored by Greg Rodgers (@gregrodgers).
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D83492
Files:
clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================
--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -19,6 +19,7 @@
#include "clang/AST/StmtVisitor.h"
#include "clang/Basic/Cuda.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
using namespace clang;
@@ -196,11 +197,10 @@
/// code. For all practical purposes this is fine because the configuration
/// is the same for all known NVPTX architectures.
enum MachineConfiguration : unsigned {
- WarpSize = 32,
- /// Number of bits required to represent a lane identifier, which is
- /// computed as log_2(WarpSize).
+ /// Number of bits required to represent a lane identifier
+ /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
+ /// specific Grid Values like GV_Warp_Size
LaneIDBits = 5,
- LaneIDMask = WarpSize - 1,
/// Global memory alignment for performance.
GlobalMemoryAlignment = 128,
@@ -436,6 +436,7 @@
EscapedDeclsForTeams = EscapedDecls.getArrayRef();
else
EscapedDeclsForParallel = EscapedDecls.getArrayRef();
+ unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
GlobalizedRD = ::buildRecordForGlobalizedVars(
CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
MappedDeclsFields, WarpSize);
@@ -624,6 +625,12 @@
/// Get the GPU warp size.
static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
+ if (CGF.getTarget().getTriple().isAMDGCN()) {
+ CGBuilderTy &Bld = CGF.Builder;
+ // return constant compile-time target-specific warp size
+ unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+ return Bld.getInt32(WarpSize);
+ }
return CGF.EmitRuntimeCall(
llvm::Intrinsic::getDeclaration(
&CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
@@ -643,6 +650,8 @@
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
+ unsigned LaneIDBits =
+ CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size_Log2);
return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id");
}
@@ -651,6 +660,8 @@
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
+ unsigned LaneIDMask = CGF.getContext().getTargetInfo().getGridValue(
+ llvm::omp::GV_Warp_Size_Log2_Mask);
return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask),
"nvptx_lane_id");
}
@@ -2073,6 +2084,7 @@
getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
+ unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
if (!LastPrivatesReductions.empty()) {
GlobalizedRD = ::buildRecordForGlobalizedVars(
CGM.getContext(), llvm::None, LastPrivatesReductions,
@@ -3243,6 +3255,7 @@
"__openmp_nvptx_data_transfer_temporary_storage";
llvm::GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
+ unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
if (!TransferMedium) {
auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D83492.276767.patch
Type: text/x-patch
Size: 3658 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20200709/0578c6c9/attachment.bin>
More information about the cfe-commits
mailing list