[Openmp-commits] [openmp] 9b06ac9 - [nfc][omptarget] Use builtin var abstraction. Second pass at D69476
via Openmp-commits
openmp-commits at lists.llvm.org
Thu Oct 31 19:22:04 PDT 2019
Author: JonChesterfield
Date: 2019-11-01T02:21:44Z
New Revision: 9b06ac98d0818be3534abe6bc031bf8a40361363
URL: https://github.com/llvm/llvm-project/commit/9b06ac98d0818be3534abe6bc031bf8a40361363
DIFF: https://github.com/llvm/llvm-project/commit/9b06ac98d0818be3534abe6bc031bf8a40361363.diff
LOG: [nfc][omptarget] Use builtin var abstraction. Second pass at D69476
Summary:
[nfc][omptarget] Use builtin var abstraction. Second pass at D69476
Use the wrappers in support.h for cuda builtin variables at all call sites.
Localises use of cuda and removes WARPSIZE==32 assumption in debug.h.
Reviewers: ABataev, jdoerfert, grokos
Reviewed By: jdoerfert
Subscribers: openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D69693
Added:
Modified:
openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
Removed:
################################################################################
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index 78b04ec5cffe..f2892acb790a 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -13,11 +13,6 @@
#include "target_impl.h"
#include <stdio.h>
-// Warp ID in the CUDA block
-INLINE static unsigned getWarpId() { return threadIdx.x / WARPSIZE; }
-// Lane ID in the CUDA warp.
-INLINE static unsigned getLaneId() { return threadIdx.x % WARPSIZE; }
-
// Return true if this is the first active thread in the warp.
INLINE static bool IsWarpMasterActiveThread() {
unsigned long long Mask = __kmpc_impl_activemask();
@@ -67,7 +62,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS,
DSPRINT0(DSFLAG_INIT,
"Entering __kmpc_initialize_data_sharing_environment\n");
- unsigned WID = getWarpId();
+ unsigned WID = GetWarpId();
DSPRINT(DSFLAG_INIT, "Warp ID: %u\n", WID);
omptarget_nvptx_TeamDescr *teamDescr =
@@ -111,7 +106,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
DSPRINT(DSFLAG, "Default Data Size %016llx\n",
(unsigned long long)SharingDefaultDataSize);
- unsigned WID = getWarpId();
+ unsigned WID = GetWarpId();
__kmpc_impl_lanemask_t CurActiveThreads = __kmpc_impl_activemask();
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
@@ -231,7 +226,7 @@ EXTERN void __kmpc_data_sharing_environment_end(
DSPRINT0(DSFLAG, "Entering __kmpc_data_sharing_environment_end\n");
- unsigned WID = getWarpId();
+ unsigned WID = GetWarpId();
if (IsEntryPoint) {
if (IsWarpMasterActiveThread()) {
@@ -359,7 +354,7 @@ EXTERN void __kmpc_data_sharing_init_stack_spmd() {
// This function initializes the stack pointer with the pointer to the
// statically allocated shared memory slots. The size of a shared memory
// slot is pre-determined to be 256 bytes.
- if (threadIdx.x == 0)
+ if (GetThreadIdInBlock() == 0)
data_sharing_init_stack_common();
__threadfence_block();
@@ -377,7 +372,7 @@ INLINE static void* data_sharing_push_stack_common(size_t PushSize) {
PushSize = (PushSize + (Alignment - 1)) / Alignment * Alignment;
// Frame pointer must be visible to all workers in the same warp.
- const unsigned WID = getWarpId();
+ const unsigned WID = GetWarpId();
void *FrameP = 0;
__kmpc_impl_lanemask_t CurActive = __kmpc_impl_activemask();
@@ -467,7 +462,7 @@ EXTERN void *__kmpc_data_sharing_push_stack(size_t DataSize,
// Compute the start address of the frame of each thread in the warp.
uintptr_t FrameStartAddress =
(uintptr_t) data_sharing_push_stack_common(PushSize);
- FrameStartAddress += (uintptr_t) (getLaneId() * DataSize);
+ FrameStartAddress += (uintptr_t) (GetLaneId() * DataSize);
return (void *)FrameStartAddress;
}
@@ -482,7 +477,7 @@ EXTERN void __kmpc_data_sharing_pop_stack(void *FrameStart) {
__threadfence_block();
if (GetThreadIdInBlock() % WARPSIZE == 0) {
- unsigned WID = getWarpId();
+ unsigned WID = GetWarpId();
// Current slot
__kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h b/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
index 1052392155a7..1f6605790ef8 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/debug.h
@@ -128,12 +128,12 @@
#if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING
#include <stdio.h>
-#include "target_impl.h"
+#include "support.h"
template <typename... Arguments>
NOINLINE static void log(const char *fmt, Arguments... parameters) {
- printf(fmt, (int)blockIdx.x, (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE),
- (int)(threadIdx.x & 0x1F), parameters...);
+ printf(fmt, (int)GetBlockIdInKernel(), (int)GetThreadIdInBlock(),
+ (int)GetWarpId(), (int)GetLaneId, parameters...);
}
#endif
@@ -144,9 +144,8 @@ template <typename... Arguments>
NOINLINE static void check(bool cond, const char *fmt,
Arguments... parameters) {
if (!cond)
- printf(fmt, (int)blockIdx.x, (int)threadIdx.x,
- (int)(threadIdx.x / WARPSIZE), (int)(threadIdx.x & 0x1F),
- parameters...);
+ printf(fmt, (int)GetBlockIdInKernel(), (int)GetThreadIdInBlock(),
+ (int)GetWarpId(), (int)GetLaneId, parameters...);
assert(cond);
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
index 20a22f425324..e86b1d1fdbf3 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -364,7 +364,7 @@ EXTERN void omp_set_lock(omp_lock_t *lock) {
for (;;) {
now = clock();
clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
- if (cycles >= __OMP_SPIN * blockIdx.x) {
+ if (cycles >= __OMP_SPIN * GetBlockIdInKernel()) {
break;
}
}
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
index 2b89d8d34660..27675975041a 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/support.cu
@@ -106,9 +106,9 @@ INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; }
INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
-INLINE unsigned GetWarpId() { return threadIdx.x / WARPSIZE; }
+INLINE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
-INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
+INLINE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); }
////////////////////////////////////////////////////////////////////////////////
//
@@ -124,7 +124,9 @@ INLINE unsigned GetLaneId() { return threadIdx.x & (WARPSIZE - 1); }
// If NumThreads is 1024, master id is 992.
//
// Called in Generic Execution Mode only.
-INLINE int GetMasterThreadID() { return (blockDim.x - 1) & ~(WARPSIZE - 1); }
+INLINE int GetMasterThreadID() {
+ return (GetNumberOfThreadsInBlock() - 1) & ~(WARPSIZE - 1);
+}
// The last warp is reserved for the master; other warps are workers.
// Called in Generic Execution Mode only.
More information about the Openmp-commits
mailing list