[clang] 4022bc2 - [OpenMP][AMDGCN] Support OpenMP offloading for AMDGCN architecture - Part 2

Saiyedul Islam via cfe-commits cfe-commits at lists.llvm.org
Wed Jun 10 11:10:19 PDT 2020


Author: Saiyedul Islam
Date: 2020-06-10T18:09:59Z
New Revision: 4022bc2a6c5e585d99a76b1b2f14c2986b823ce9

URL: https://github.com/llvm/llvm-project/commit/4022bc2a6c5e585d99a76b1b2f14c2986b823ce9
DIFF: https://github.com/llvm/llvm-project/commit/4022bc2a6c5e585d99a76b1b2f14c2986b823ce9.diff

LOG: [OpenMP][AMDGCN] Support OpenMP offloading for AMDGCN architecture - Part 2

Summary:
New file include to support platform dependent grid constants. It will be
used by clang, libomptarget plugins, and deviceRTLs to access constant
values consistently and with fast access in the deviceRTLs.

Originally authored by Greg Rodgers (@gregrodgers).

Reviewers: arsenm, sameerds, jdoerfert, yaxunl, b-sumner, scchan, JonChesterfield

Reviewed By: arsenm

Subscribers: llvm-commits, pdhaliwal, jholewinski, jvesely, wdng, nhaehnle, guansong, kerbowa, sstefan1, cfe-commits, ronlieb, gregrodgers

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D80917

Added: 
    llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h

Modified: 
    clang/include/clang/Basic/TargetInfo.h
    clang/lib/Basic/Targets/AMDGPU.cpp
    clang/lib/Basic/Targets/NVPTX.cpp

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index 4a7257d1b426..b1a4017b4c76 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -15,6 +15,7 @@
 #define LLVM_CLANG_BASIC_TARGETINFO_H
 
 #include "clang/Basic/AddressSpaces.h"
+#include "clang/Basic/CodeGenOptions.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/LangOptions.h"
 #include "clang/Basic/Specifiers.h"
@@ -29,6 +30,7 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/VersionTuple.h"
 #include <cassert>
@@ -198,6 +200,9 @@ class TargetInfo : public virtual TransferrableTargetInfo,
   unsigned char RegParmMax, SSERegParmMax;
   TargetCXXABI TheCXXABI;
   const LangASMap *AddrSpaceMap;
+  const unsigned *GridValues =
+      nullptr; // Array of target-specific GPU grid values that must be
+               // consistent between host RTL (plugin), device RTL, and clang.
 
   mutable StringRef PlatformName;
   mutable VersionTuple PlatformMinVersion;
@@ -1321,6 +1326,12 @@ class TargetInfo : public virtual TransferrableTargetInfo,
     return LangAS::Default;
   }
 
+  /// Return a target-specific GPU grid value based on the GVIDX enum \param gv
+  unsigned getGridValue(llvm::omp::GVIDX gv) const {
+    assert(GridValues != nullptr && "GridValues not initialized");
+    return GridValues[gv];
+  }
+
   /// Retrieve the name of the platform as it is used in the
   /// availability attribute.
   StringRef getPlatformName() const { return PlatformName; }

diff  --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index b9d7640a10b8..8017b9ee402f 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -17,6 +17,7 @@
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 #include "llvm/IR/DataLayout.h"
 
 using namespace clang;
@@ -286,6 +287,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
   resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
                                         : DataLayoutStringR600);
   assert(DataLayout->getAllocaAddrSpace() == Private);
+  GridValues = llvm::omp::AMDGPUGpuGridValues;
 
   setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
                      !isAMDGCN(Triple));

diff  --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 39b07872b142..fda9fad777d4 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/MacroBuilder.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
 
 using namespace clang;
 using namespace clang::targets;
@@ -62,6 +63,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
   TLSSupported = false;
   VLASupported = false;
   AddrSpaceMap = &NVPTXAddrSpaceMap;
+  GridValues = llvm::omp::NVPTXGpuGridValues;
   UseAddrSpaceMapMangling = true;
 
   // Define available target features

diff  --git a/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
new file mode 100644
index 000000000000..3ae4a2edbf96
--- /dev/null
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPGridValues.h
@@ -0,0 +1,131 @@
+//====--- OMPGridValues.h - Language-specific address spaces --*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief Provides definitions for Target specific Grid Values
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OPENMP_GRIDVALUES_H
+#define LLVM_OPENMP_GRIDVALUES_H
+
+namespace llvm {
+
+namespace omp {
+
+/// \brief Defines various target-specific GPU grid values that must be
+///        consistent between host RTL (plugin), device RTL, and clang.
+///        We can change grid values for a "fat" binary so that 
diff erent
+///        passes get the correct values when generating code for a
+///        multi-target binary. Both amdgcn and nvptx values are stored in
+///        this file. In the future, should there be 
diff erences between GPUs
+///        of the same architecture, then simply make a 
diff erent array and
+///        use the new array name.
+///
+/// Example usage in clang:
+///   const unsigned slot_size = ctx.GetTargetInfo().getGridValue(GV_Warp_Size);
+///
+/// Example usage in libomptarget/deviceRTLs:
+///   #include "OMPGridValues.h"
+///   #ifdef __AMDGPU__
+///     #define GRIDVAL AMDGPUGpuGridValues
+///   #else
+///     #define GRIDVAL NVPTXGpuGridValues
+///   #endif
+///   ... Then use this reference for GV_Warp_Size in the deviceRTL source.
+///   GRIDVAL[GV_Warp_Size]
+///
+/// Example usage in libomptarget hsa plugin:
+///   #include "OMPGridValues.h"
+///   #define GRIDVAL AMDGPUGpuGridValues
+///   ... Then use this reference to access GV_Warp_Size in the hsa plugin.
+///   GRIDVAL[GV_Warp_Size]
+///
+/// Example usage in libomptarget cuda plugin:
+///    #include "OMPGridValues.h"
+///    #define GRIDVAL NVPTXGpuGridValues
+///   ... Then use this reference to access GV_Warp_Size in the cuda plugin.
+///    GRIDVAL[GV_Warp_Size]
+///
+enum GVIDX {
+  /// The maximum number of workers in a kernel.
+  /// (THREAD_ABSOLUTE_LIMIT) - (GV_Warp_Size), might be issue for blockDim.z
+  GV_Threads,
+  /// The size reserved for data in a shared memory slot.
+  GV_Slot_Size,
+  /// The default value of maximum number of threads in a worker warp.
+  GV_Warp_Size,
+  /// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
+  /// for NVPTX.
+  GV_Warp_Size_32,
+  /// The number of bits required to represent the max number of threads in warp
+  GV_Warp_Size_Log2,
+  /// GV_Warp_Size * GV_Slot_Size,
+  GV_Warp_Slot_Size,
+  /// the maximum number of teams.
+  GV_Max_Teams,
+  /// Global Memory Alignment
+  GV_Mem_Align,
+  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
+  GV_Warp_Size_Log2_Mask,
+  // An alternative to the heavy data sharing infrastructure that uses global
+  // memory is one that uses device __shared__ memory.  The amount of such space
+  // (in bytes) reserved by the OpenMP runtime is noted here.
+  GV_SimpleBufferSize,
+  // The absolute maximum team size for a working group
+  GV_Max_WG_Size,
+  // The default maximum team size for a working group
+  GV_Default_WG_Size,
+  // This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
+  GV_Max_Warp_Number,
+  /// The slot size that should be reserved for a working warp.
+  /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
+  GV_Warp_Size_Log2_MaskL
+};
+
+/// For AMDGPU GPUs
+static constexpr unsigned AMDGPUGpuGridValues[] = {
+    448,       // GV_Threads
+    256,       // GV_Slot_Size
+    64,        // GV_Warp_Size
+    32,        // GV_Warp_Size_32
+    6,         // GV_Warp_Size_Log2
+    64 * 256,  // GV_Warp_Slot_Size
+    128,       // GV_Max_Teams
+    256,       // GV_Mem_Align
+    63,        // GV_Warp_Size_Log2_Mask
+    896,       // GV_SimpleBufferSize
+    1024,      // GV_Max_WG_Size,
+    256,       // GV_Defaut_WG_Size
+    1024 / 64, // GV_Max_WG_Size / GV_WarpSize
+    63         // GV_Warp_Size_Log2_MaskL
+};
+
+/// For Nvidia GPUs
+static constexpr unsigned NVPTXGpuGridValues[] = {
+    992,               // GV_Threads
+    256,               // GV_Slot_Size
+    32,                // GV_Warp_Size
+    32,                // GV_Warp_Size_32
+    5,                 // GV_Warp_Size_Log2
+    32 * 256,          // GV_Warp_Slot_Size
+    1024,              // GV_Max_Teams
+    256,               // GV_Mem_Align
+    (~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
+    896,               // GV_SimpleBufferSize
+    1024,              // GV_Max_WG_Size
+    128,               // GV_Defaut_WG_Size
+    1024 / 32,         // GV_Max_WG_Size / GV_WarpSize
+    31                 // GV_Warp_Size_Log2_MaskL
+};
+
+} // namespace omp
+} // namespace llvm
+
+#endif // LLVM_OPENMP_GRIDVALUES_H


        


More information about the cfe-commits mailing list