[Openmp-commits] [openmp] 4af84d2 - [libomptarget][nfc] Introduce SHARED, ALIGN macros

via Openmp-commits openmp-commits at lists.llvm.org
Thu Dec 5 13:58:14 PST 2019


Author: Jon Chesterfield
Date: 2019-12-05T21:57:58Z
New Revision: 4af84d2686a30e30729ef48102009ccdecddc943

URL: https://github.com/llvm/llvm-project/commit/4af84d2686a30e30729ef48102009ccdecddc943
DIFF: https://github.com/llvm/llvm-project/commit/4af84d2686a30e30729ef48102009ccdecddc943.diff

LOG: [libomptarget][nfc] Introduce SHARED, ALIGN macros

Summary:
[libomptarget][nfc] Introduce SHARED, ALIGN macros
Move remaining cuda attributes behind such macros

Reviewers: ABataev, jdoerfert, grokos

Reviewed By: ABataev

Subscribers: openmp-commits, jvesely

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D71076

Added: 
    

Modified: 
    openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
    openmp/libomptarget/deviceRTLs/common/omptarget.h
    openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
    openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
    openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
    openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
    openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index c6e082c2b961..475851ac9af3 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -22,6 +22,8 @@
 #define DEVICE __attribute__((device))
 #define INLINE inline DEVICE
 #define NOINLINE __attribute__((noinline)) DEVICE
+#define SHARED __attribute__((shared))
+#define ALIGN(N) __attribute__((aligned(N)))
 
 ////////////////////////////////////////////////////////////////////////////////
 // Kernel options

diff  --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h
index 5e3c02b15342..8ffa31091d4c 100644
--- a/openmp/libomptarget/deviceRTLs/common/omptarget.h
+++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h
@@ -77,7 +77,7 @@ class omptarget_nvptx_SharedArgs {
   uint32_t nArgs;
 };
 
-extern __device__ __shared__ omptarget_nvptx_SharedArgs
+extern DEVICE SHARED omptarget_nvptx_SharedArgs
     omptarget_nvptx_globalArgs;
 
 // Data structure to keep in shared memory that traces the current slot, stack,
@@ -107,7 +107,7 @@ struct __kmpc_data_sharing_master_slot_static {
   void *DataEnd;
   char Data[DS_Slot_Size];
 };
-extern __device__ __shared__ DataSharingStateTy DataSharingState;
+extern DEVICE SHARED DataSharingStateTy DataSharingState;
 
 ////////////////////////////////////////////////////////////////////////////////
 // task ICV and (implicit & explicit) task state
@@ -259,9 +259,9 @@ class omptarget_nvptx_TeamDescr {
       workDescrForActiveParallel; // one, ONLY for the active par
   uint64_t lastprivateIterBuffer;
 
-  __align__(16)
-      __kmpc_data_sharing_worker_slot_static worker_rootS[WARPSIZE];
-  __align__(16) __kmpc_data_sharing_master_slot_static master_rootS[1];
+  ALIGN(16)
+  __kmpc_data_sharing_worker_slot_static worker_rootS[WARPSIZE];
+  ALIGN(16) __kmpc_data_sharing_master_slot_static master_rootS[1];
 };
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -326,7 +326,7 @@ class omptarget_nvptx_ThreadPrivateContext {
 /// Memory manager for statically allocated memory.
 class omptarget_nvptx_SimpleMemoryManager {
 private:
-  __align__(128) struct MemDataTy {
+  ALIGN(128) struct MemDataTy {
     volatile unsigned keys[OMP_STATE_COUNT];
   } MemData[MAX_SM];
 
@@ -345,20 +345,20 @@ class omptarget_nvptx_SimpleMemoryManager {
 // global data tables
 ////////////////////////////////////////////////////////////////////////////////
 
-extern __device__ omptarget_nvptx_SimpleMemoryManager
+extern DEVICE omptarget_nvptx_SimpleMemoryManager
     omptarget_nvptx_simpleMemoryManager;
-extern __device__ __shared__ uint32_t usedMemIdx;
-extern __device__ __shared__ uint32_t usedSlotIdx;
-extern __device__ __shared__ uint8_t
+extern DEVICE SHARED uint32_t usedMemIdx;
+extern DEVICE SHARED uint32_t usedSlotIdx;
+extern DEVICE SHARED uint8_t
     parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
-extern __device__ __shared__ uint16_t threadLimit;
-extern __device__ __shared__ uint16_t threadsInTeam;
-extern __device__ __shared__ uint16_t nThreads;
-extern __device__ __shared__
+extern DEVICE SHARED uint16_t threadLimit;
+extern DEVICE SHARED uint16_t threadsInTeam;
+extern DEVICE SHARED uint16_t nThreads;
+extern DEVICE SHARED
     omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
 
-extern __device__ __shared__ uint32_t execution_param;
-extern __device__ __shared__ void *ReductionScratchpadPtr;
+extern DEVICE SHARED uint32_t execution_param;
+extern DEVICE SHARED void *ReductionScratchpadPtr;
 
 ////////////////////////////////////////////////////////////////////////////////
 // work function (outlined parallel/simd functions) and arguments.
@@ -366,7 +366,7 @@ extern __device__ __shared__ void *ReductionScratchpadPtr;
 ////////////////////////////////////////////////////////////////////////////////
 
 typedef void *omptarget_nvptx_WorkFn;
-extern volatile __device__ __shared__ omptarget_nvptx_WorkFn
+extern volatile DEVICE SHARED omptarget_nvptx_WorkFn
     omptarget_nvptx_workFn;
 
 ////////////////////////////////////////////////////////////////////////////////

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
index f99156cca447..9bf27bfcbf18 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
@@ -17,7 +17,7 @@
 // global data tables
 ////////////////////////////////////////////////////////////////////////////////
 
-extern __device__
+extern DEVICE
     omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
         omptarget_nvptx_device_State[MAX_SM];
 

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index 43d6cc12f95e..75068c7814ac 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -97,7 +97,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
 
   DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_begin\n");
 
-  // If the runtime has been elided, used __shared__ memory for master-worker
+  // If the runtime has been elided, used shared memory for master-worker
   // data sharing.
   if (!IsOMPRuntimeInitialized)
     return (void *)&DataSharingState;
@@ -300,7 +300,7 @@ __kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID,
                                           int16_t IsOMPRuntimeInitialized) {
   DSPRINT0(DSFLAG, "Entering __kmpc_get_data_sharing_environment_frame\n");
 
-  // If the runtime has been elided, use __shared__ memory for master-worker
+  // If the runtime has been elided, use shared memory for master-worker
   // data sharing.  We're reusing the statically allocated data structure
   // that is used for standard data sharing.
   if (!IsOMPRuntimeInitialized)

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
index 4d3701e8f076..9f8758acce8f 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
@@ -17,27 +17,27 @@
 // global device environment
 ////////////////////////////////////////////////////////////////////////////////
 
-__device__ omptarget_device_environmentTy omptarget_device_environment;
+DEVICE omptarget_device_environmentTy omptarget_device_environment;
 
 ////////////////////////////////////////////////////////////////////////////////
 // global data holding OpenMP state information
 ////////////////////////////////////////////////////////////////////////////////
 
-__device__
+DEVICE
     omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
         omptarget_nvptx_device_State[MAX_SM];
 
-__device__ omptarget_nvptx_SimpleMemoryManager
+DEVICE omptarget_nvptx_SimpleMemoryManager
     omptarget_nvptx_simpleMemoryManager;
-__device__ __shared__ uint32_t usedMemIdx;
-__device__ __shared__ uint32_t usedSlotIdx;
+DEVICE SHARED uint32_t usedMemIdx;
+DEVICE SHARED uint32_t usedSlotIdx;
 
-__device__ __shared__ uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
-__device__ __shared__ uint16_t threadLimit;
-__device__ __shared__ uint16_t threadsInTeam;
-__device__ __shared__ uint16_t nThreads;
+DEVICE SHARED uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
+DEVICE SHARED uint16_t threadLimit;
+DEVICE SHARED uint16_t threadsInTeam;
+DEVICE SHARED uint16_t nThreads;
 // Pointer to this team's OpenMP state object
-__device__ __shared__
+DEVICE SHARED
     omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -45,24 +45,24 @@ __device__ __shared__
 // communicate with the workers.  Since it is in shared memory, there is one
 // copy of these variables for each kernel, instance, and team.
 ////////////////////////////////////////////////////////////////////////////////
-volatile __device__ __shared__ omptarget_nvptx_WorkFn omptarget_nvptx_workFn;
+volatile DEVICE SHARED omptarget_nvptx_WorkFn omptarget_nvptx_workFn;
 
 ////////////////////////////////////////////////////////////////////////////////
 // OpenMP kernel execution parameters
 ////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ uint32_t execution_param;
+DEVICE SHARED uint32_t execution_param;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Data sharing state
 ////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ DataSharingStateTy DataSharingState;
+DEVICE SHARED DataSharingStateTy DataSharingState;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Scratchpad for teams reduction.
 ////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ void *ReductionScratchpadPtr;
+DEVICE SHARED void *ReductionScratchpadPtr;
 
 ////////////////////////////////////////////////////////////////////////////////
 // Data sharing related variables.
 ////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ omptarget_nvptx_SharedArgs omptarget_nvptx_globalArgs;
+DEVICE SHARED omptarget_nvptx_SharedArgs omptarget_nvptx_globalArgs;

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
index 1bedb2379ca7..dfa7c4db1a66 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -233,7 +233,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
                           : /*Master thread only*/ 1;
   uint32_t TeamId = GetBlockIdInKernel();
   uint32_t NumTeams = GetNumberOfBlocksInKernel();
-  __shared__ volatile bool IsLastTeam;
+  SHARED volatile bool IsLastTeam;
 
   // Team masters of all teams write to the scratchpad.
   if (ThreadId == 0) {
@@ -403,8 +403,8 @@ INLINE static uint32_t roundToWarpsize(uint32_t s) {
   return (s & ~(unsigned)(WARPSIZE - 1));
 }
 
-__device__ static volatile uint32_t IterCnt = 0;
-__device__ static volatile uint32_t Cnt = 0;
+DEVICE static volatile uint32_t IterCnt = 0;
+DEVICE static volatile uint32_t Cnt = 0;
 EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
     kmp_Ident *loc, int32_t global_tid, void *global_buffer,
     int32_t num_of_records, void *reduce_data, kmp_ShuffleReductFctPtr shflFct,
@@ -426,8 +426,8 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
                          : /*Master thread only*/ 1;
   uint32_t TeamId = GetBlockIdInKernel();
   uint32_t NumTeams = GetNumberOfBlocksInKernel();
-  __shared__ unsigned Bound;
-  __shared__ unsigned ChunkTeamCount;
+  SHARED unsigned Bound;
+  SHARED unsigned ChunkTeamCount;
 
   // Block progress for teams greater than the current upper
   // limit. We always only allow a number of teams less or equal

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index e0bacab3c643..5daeb5ca8291 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -18,6 +18,8 @@
 #define DEVICE __device__
 #define INLINE __forceinline__ DEVICE
 #define NOINLINE __noinline__ DEVICE
+#define SHARED __shared__
+#define ALIGN(N) __align__(N)
 
 ////////////////////////////////////////////////////////////////////////////////
 // Kernel options


        


More information about the Openmp-commits mailing list