[PATCH] D83492: [OpenMP] Use common interface to access GPU Grid Values

Thu Jul 9 09:56:04 PDT 2020

saiislam created this revision.
saiislam added reviewers: jdoerfert, ABataev, JonChesterfield.
Herald added subscribers: cfe-commits, sstefan1, guansong, yaxunl, jholewinski.
Herald added a project: clang.

Use common interface for accessing target specific GPU grid values in NVPTX
OpenMP codegen as proposed in https://reviews.llvm.org/D80917

Originally authored by Greg Rodgers (@gregrodgers).


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D83492

Files:
  clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp


Index: clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
===================================================================

--- clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -19,6 +19,7 @@
 #include "clang/AST/StmtVisitor.h"
 #include "clang/Basic/Cuda.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/IR/IntrinsicsNVPTX.h"
 
 using namespace clang;
@@ -196,11 +197,10 @@
 /// code.  For all practical purposes this is fine because the configuration
 /// is the same for all known NVPTX architectures.
 enum MachineConfiguration : unsigned {
-  WarpSize = 32,
-  /// Number of bits required to represent a lane identifier, which is
-  /// computed as log_2(WarpSize).
+  /// Number of bits required to represent a lane identifier
+  /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
+  /// specific Grid Values like GV_Warp_Size
   LaneIDBits = 5,
-  LaneIDMask = WarpSize - 1,
 
   /// Global memory alignment for performance.
   GlobalMemoryAlignment = 128,
@@ -436,6 +436,7 @@
       EscapedDeclsForTeams = EscapedDecls.getArrayRef();
     else
       EscapedDeclsForParallel = EscapedDecls.getArrayRef();
+    unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
     GlobalizedRD = ::buildRecordForGlobalizedVars(
         CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
         MappedDeclsFields, WarpSize);
@@ -624,6 +625,12 @@
 
 /// Get the GPU warp size.
 static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
+  if (CGF.getTarget().getTriple().isAMDGCN()) {
+    CGBuilderTy &Bld = CGF.Builder;
+    // return constant compile-time target-specific warp size
+    unsigned WarpSize = CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
+    return Bld.getInt32(WarpSize);
+  }
   return CGF.EmitRuntimeCall(
       llvm::Intrinsic::getDeclaration(
           &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
@@ -643,6 +650,8 @@
 /// on the NVPTX device, to generate more efficient code.
 static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
+  unsigned LaneIDBits =
+      CGF.getTarget().getGridValue(llvm::omp::GV_Warp_Size_Log2);
   return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id");
 }
 
@@ -651,6 +660,8 @@
 /// on the NVPTX device, to generate more efficient code.
 static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
   CGBuilderTy &Bld = CGF.Builder;
+  unsigned LaneIDMask = CGF.getContext().getTargetInfo().getGridValue(
+      llvm::omp::GV_Warp_Size_Log2_Mask);
   return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask),
                        "nvptx_lane_id");
 }
@@ -2073,6 +2084,7 @@
     getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
   if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
     getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
+    unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
     if (!LastPrivatesReductions.empty()) {
       GlobalizedRD = ::buildRecordForGlobalizedVars(
           CGM.getContext(), llvm::None, LastPrivatesReductions,
@@ -3243,6 +3255,7 @@
       "__openmp_nvptx_data_transfer_temporary_storage";
   llvm::GlobalVariable *TransferMedium =
       M.getGlobalVariable(TransferMediumName);
+  unsigned WarpSize = CGM.getTarget().getGridValue(llvm::omp::GV_Warp_Size);
   if (!TransferMedium) {
     auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
     unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D83492.276767.patch
Type: text/x-patch
Size: 3658 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20200709/0578c6c9/attachment.bin>