[Openmp-commits] [openmp] 4af84d2 - [libomptarget][nfc] Introduce SHARED, ALIGN macros
via Openmp-commits
openmp-commits at lists.llvm.org
Thu Dec 5 13:58:14 PST 2019
Author: Jon Chesterfield
Date: 2019-12-05T21:57:58Z
New Revision: 4af84d2686a30e30729ef48102009ccdecddc943
URL: https://github.com/llvm/llvm-project/commit/4af84d2686a30e30729ef48102009ccdecddc943
DIFF: https://github.com/llvm/llvm-project/commit/4af84d2686a30e30729ef48102009ccdecddc943.diff
LOG: [libomptarget][nfc] Introduce SHARED, ALIGN macros
Summary:
[libomptarget][nfc] Introduce SHARED, ALIGN macros
Move remaining cuda attributes behind such macros
Reviewers: ABataev, jdoerfert, grokos
Reviewed By: ABataev
Subscribers: openmp-commits, jvesely
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D71076
Added:
Modified:
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
openmp/libomptarget/deviceRTLs/common/omptarget.h
openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index c6e082c2b961..475851ac9af3 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -22,6 +22,8 @@
#define DEVICE __attribute__((device))
#define INLINE inline DEVICE
#define NOINLINE __attribute__((noinline)) DEVICE
+#define SHARED __attribute__((shared))
+#define ALIGN(N) __attribute__((aligned(N)))
////////////////////////////////////////////////////////////////////////////////
// Kernel options
diff --git a/openmp/libomptarget/deviceRTLs/common/omptarget.h b/openmp/libomptarget/deviceRTLs/common/omptarget.h
index 5e3c02b15342..8ffa31091d4c 100644
--- a/openmp/libomptarget/deviceRTLs/common/omptarget.h
+++ b/openmp/libomptarget/deviceRTLs/common/omptarget.h
@@ -77,7 +77,7 @@ class omptarget_nvptx_SharedArgs {
uint32_t nArgs;
};
-extern __device__ __shared__ omptarget_nvptx_SharedArgs
+extern DEVICE SHARED omptarget_nvptx_SharedArgs
omptarget_nvptx_globalArgs;
// Data structure to keep in shared memory that traces the current slot, stack,
@@ -107,7 +107,7 @@ struct __kmpc_data_sharing_master_slot_static {
void *DataEnd;
char Data[DS_Slot_Size];
};
-extern __device__ __shared__ DataSharingStateTy DataSharingState;
+extern DEVICE SHARED DataSharingStateTy DataSharingState;
////////////////////////////////////////////////////////////////////////////////
// task ICV and (implicit & explicit) task state
@@ -259,9 +259,9 @@ class omptarget_nvptx_TeamDescr {
workDescrForActiveParallel; // one, ONLY for the active par
uint64_t lastprivateIterBuffer;
- __align__(16)
- __kmpc_data_sharing_worker_slot_static worker_rootS[WARPSIZE];
- __align__(16) __kmpc_data_sharing_master_slot_static master_rootS[1];
+ ALIGN(16)
+ __kmpc_data_sharing_worker_slot_static worker_rootS[WARPSIZE];
+ ALIGN(16) __kmpc_data_sharing_master_slot_static master_rootS[1];
};
////////////////////////////////////////////////////////////////////////////////
@@ -326,7 +326,7 @@ class omptarget_nvptx_ThreadPrivateContext {
/// Memory manager for statically allocated memory.
class omptarget_nvptx_SimpleMemoryManager {
private:
- __align__(128) struct MemDataTy {
+ ALIGN(128) struct MemDataTy {
volatile unsigned keys[OMP_STATE_COUNT];
} MemData[MAX_SM];
@@ -345,20 +345,20 @@ class omptarget_nvptx_SimpleMemoryManager {
// global data tables
////////////////////////////////////////////////////////////////////////////////
-extern __device__ omptarget_nvptx_SimpleMemoryManager
+extern DEVICE omptarget_nvptx_SimpleMemoryManager
omptarget_nvptx_simpleMemoryManager;
-extern __device__ __shared__ uint32_t usedMemIdx;
-extern __device__ __shared__ uint32_t usedSlotIdx;
-extern __device__ __shared__ uint8_t
+extern DEVICE SHARED uint32_t usedMemIdx;
+extern DEVICE SHARED uint32_t usedSlotIdx;
+extern DEVICE SHARED uint8_t
parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
-extern __device__ __shared__ uint16_t threadLimit;
-extern __device__ __shared__ uint16_t threadsInTeam;
-extern __device__ __shared__ uint16_t nThreads;
-extern __device__ __shared__
+extern DEVICE SHARED uint16_t threadLimit;
+extern DEVICE SHARED uint16_t threadsInTeam;
+extern DEVICE SHARED uint16_t nThreads;
+extern DEVICE SHARED
omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
-extern __device__ __shared__ uint32_t execution_param;
-extern __device__ __shared__ void *ReductionScratchpadPtr;
+extern DEVICE SHARED uint32_t execution_param;
+extern DEVICE SHARED void *ReductionScratchpadPtr;
////////////////////////////////////////////////////////////////////////////////
// work function (outlined parallel/simd functions) and arguments.
@@ -366,7 +366,7 @@ extern __device__ __shared__ void *ReductionScratchpadPtr;
////////////////////////////////////////////////////////////////////////////////
typedef void *omptarget_nvptx_WorkFn;
-extern volatile __device__ __shared__ omptarget_nvptx_WorkFn
+extern volatile DEVICE SHARED omptarget_nvptx_WorkFn
omptarget_nvptx_workFn;
////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
index f99156cca447..9bf27bfcbf18 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/omptarget.cu
@@ -17,7 +17,7 @@
// global data tables
////////////////////////////////////////////////////////////////////////////////
-extern __device__
+extern DEVICE
omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
omptarget_nvptx_device_State[MAX_SM];
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index 43d6cc12f95e..75068c7814ac 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -97,7 +97,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_begin\n");
- // If the runtime has been elided, used __shared__ memory for master-worker
+ // If the runtime has been elided, used shared memory for master-worker
// data sharing.
if (!IsOMPRuntimeInitialized)
return (void *)&DataSharingState;
@@ -300,7 +300,7 @@ __kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID,
int16_t IsOMPRuntimeInitialized) {
DSPRINT0(DSFLAG, "Entering __kmpc_get_data_sharing_environment_frame\n");
- // If the runtime has been elided, use __shared__ memory for master-worker
+ // If the runtime has been elided, use shared memory for master-worker
// data sharing. We're reusing the statically allocated data structure
// that is used for standard data sharing.
if (!IsOMPRuntimeInitialized)
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
index 4d3701e8f076..9f8758acce8f 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/omp_data.cu
@@ -17,27 +17,27 @@
// global device environment
////////////////////////////////////////////////////////////////////////////////
-__device__ omptarget_device_environmentTy omptarget_device_environment;
+DEVICE omptarget_device_environmentTy omptarget_device_environment;
////////////////////////////////////////////////////////////////////////////////
// global data holding OpenMP state information
////////////////////////////////////////////////////////////////////////////////
-__device__
+DEVICE
omptarget_nvptx_Queue<omptarget_nvptx_ThreadPrivateContext, OMP_STATE_COUNT>
omptarget_nvptx_device_State[MAX_SM];
-__device__ omptarget_nvptx_SimpleMemoryManager
+DEVICE omptarget_nvptx_SimpleMemoryManager
omptarget_nvptx_simpleMemoryManager;
-__device__ __shared__ uint32_t usedMemIdx;
-__device__ __shared__ uint32_t usedSlotIdx;
+DEVICE SHARED uint32_t usedMemIdx;
+DEVICE SHARED uint32_t usedSlotIdx;
-__device__ __shared__ uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
-__device__ __shared__ uint16_t threadLimit;
-__device__ __shared__ uint16_t threadsInTeam;
-__device__ __shared__ uint16_t nThreads;
+DEVICE SHARED uint8_t parallelLevel[MAX_THREADS_PER_TEAM / WARPSIZE];
+DEVICE SHARED uint16_t threadLimit;
+DEVICE SHARED uint16_t threadsInTeam;
+DEVICE SHARED uint16_t nThreads;
// Pointer to this team's OpenMP state object
-__device__ __shared__
+DEVICE SHARED
omptarget_nvptx_ThreadPrivateContext *omptarget_nvptx_threadPrivateContext;
////////////////////////////////////////////////////////////////////////////////
@@ -45,24 +45,24 @@ __device__ __shared__
// communicate with the workers. Since it is in shared memory, there is one
// copy of these variables for each kernel, instance, and team.
////////////////////////////////////////////////////////////////////////////////
-volatile __device__ __shared__ omptarget_nvptx_WorkFn omptarget_nvptx_workFn;
+volatile DEVICE SHARED omptarget_nvptx_WorkFn omptarget_nvptx_workFn;
////////////////////////////////////////////////////////////////////////////////
// OpenMP kernel execution parameters
////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ uint32_t execution_param;
+DEVICE SHARED uint32_t execution_param;
////////////////////////////////////////////////////////////////////////////////
// Data sharing state
////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ DataSharingStateTy DataSharingState;
+DEVICE SHARED DataSharingStateTy DataSharingState;
////////////////////////////////////////////////////////////////////////////////
// Scratchpad for teams reduction.
////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ void *ReductionScratchpadPtr;
+DEVICE SHARED void *ReductionScratchpadPtr;
////////////////////////////////////////////////////////////////////////////////
// Data sharing related variables.
////////////////////////////////////////////////////////////////////////////////
-__device__ __shared__ omptarget_nvptx_SharedArgs omptarget_nvptx_globalArgs;
+DEVICE SHARED omptarget_nvptx_SharedArgs omptarget_nvptx_globalArgs;
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
index 1bedb2379ca7..dfa7c4db1a66 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -233,7 +233,7 @@ static int32_t nvptx_teams_reduce_nowait(int32_t global_tid, int32_t num_vars,
: /*Master thread only*/ 1;
uint32_t TeamId = GetBlockIdInKernel();
uint32_t NumTeams = GetNumberOfBlocksInKernel();
- __shared__ volatile bool IsLastTeam;
+ SHARED volatile bool IsLastTeam;
// Team masters of all teams write to the scratchpad.
if (ThreadId == 0) {
@@ -403,8 +403,8 @@ INLINE static uint32_t roundToWarpsize(uint32_t s) {
return (s & ~(unsigned)(WARPSIZE - 1));
}
-__device__ static volatile uint32_t IterCnt = 0;
-__device__ static volatile uint32_t Cnt = 0;
+DEVICE static volatile uint32_t IterCnt = 0;
+DEVICE static volatile uint32_t Cnt = 0;
EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
kmp_Ident *loc, int32_t global_tid, void *global_buffer,
int32_t num_of_records, void *reduce_data, kmp_ShuffleReductFctPtr shflFct,
@@ -426,8 +426,8 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
: /*Master thread only*/ 1;
uint32_t TeamId = GetBlockIdInKernel();
uint32_t NumTeams = GetNumberOfBlocksInKernel();
- __shared__ unsigned Bound;
- __shared__ unsigned ChunkTeamCount;
+ SHARED unsigned Bound;
+ SHARED unsigned ChunkTeamCount;
// Block progress for teams greater than the current upper
// limit. We always only allow a number of teams less or equal
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index e0bacab3c643..5daeb5ca8291 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -18,6 +18,8 @@
#define DEVICE __device__
#define INLINE __forceinline__ DEVICE
#define NOINLINE __noinline__ DEVICE
+#define SHARED __shared__
+#define ALIGN(N) __align__(N)
////////////////////////////////////////////////////////////////////////////////
// Kernel options
More information about the Openmp-commits
mailing list