[Openmp-commits] [openmp] 918a106 - [libomptarget][nfc] Move GetWarp/LaneId functions into per arch code
Jon Chesterfield via Openmp-commits
openmp-commits at lists.llvm.org
Thu Mar 5 09:06:11 PST 2020
Author: Jon Chesterfield
Date: 2020-03-05T17:05:58Z
New Revision: 918a1065be642a3d5f804c95d7971c2d1b96cdf5
URL: https://github.com/llvm/llvm-project/commit/918a1065be642a3d5f804c95d7971c2d1b96cdf5
DIFF: https://github.com/llvm/llvm-project/commit/918a1065be642a3d5f804c95d7971c2d1b96cdf5.diff
LOG: [libomptarget][nfc] Move GetWarp/LaneId functions into per arch code
Summary:
[libomptarget][nfc] Move GetWarp/LaneId functions into per arch code
No code change for nvptx. Amdgcn currently has two implementations of GetLaneId,
this patch keeps the one a colleague considered to be superior for our ISA.
GetWarpId is currently the same function for amdgcn and nvptx, but I think it's
cleaner to keep it grouped with all the others than to keep it in support.cu.
Reviewers: jdoerfert, grokos, ABataev
Reviewed By: jdoerfert
Subscribers: jvesely, openmp-commits
Tags: #openmp
Differential Revision: https://reviews.llvm.org/D75587
Added:
Modified:
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
openmp/libomptarget/deviceRTLs/common/src/support.cu
openmp/libomptarget/deviceRTLs/common/support.h
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Removed:
################################################################################
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index b3c892323b74..c2d5e329f0a5 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -127,6 +127,8 @@ INLINE int GetThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
INLINE int GetBlockIdInKernel() { return __builtin_amdgcn_workgroup_id_x(); }
DEVICE int GetNumberOfBlocksInKernel();
DEVICE int GetNumberOfThreadsInBlock();
+DEVICE unsigned GetWarpId();
+DEVICE unsigned GetLaneId();
DEVICE bool __kmpc_impl_is_first_active_thread();
diff --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index a32bcd87a648..9807483d4c42 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -14,14 +14,10 @@
// Implementations initially derived from hcc
-static DEVICE uint32_t getLaneId(void) {
- return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
-}
-
// Initialized with a 64-bit mask with bits set in positions less than the
// thread's lane number in the warp
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
- uint32_t lane = getLaneId();
+ uint32_t lane = GetLaneId();
int64_t ballot = __kmpc_impl_activemask();
uint64_t mask = ((uint64_t)1 << lane) - (uint64_t)1;
return mask & ballot;
@@ -30,7 +26,7 @@ DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
// Initialized with a 64-bit mask with bits set in positions greater than the
// thread's lane number in the warp
DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
- uint32_t lane = getLaneId();
+ uint32_t lane = GetLaneId();
if (lane == (WARPSIZE - 1))
return 0;
uint64_t ballot = __kmpc_impl_activemask();
@@ -53,14 +49,14 @@ DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
DEVICE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t var,
int32_t srcLane) {
int width = WARPSIZE;
- int self = getLaneId();
+ int self = GetLaneId();
int index = srcLane + (self & ~(width - 1));
return __builtin_amdgcn_ds_bpermute(index << 2, var);
}
DEVICE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t var,
uint32_t laneDelta, int32_t width) {
- int self = getLaneId();
+ int self = GetLaneId();
int index = self + laneDelta;
index = (int)(laneDelta + (self & (width - 1))) >= width ? self : index;
return __builtin_amdgcn_ds_bpermute(index << 2, var);
@@ -70,3 +66,7 @@ EXTERN uint64_t __ockl_get_local_size(uint32_t);
EXTERN uint64_t __ockl_get_num_groups(uint32_t);
DEVICE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); }
DEVICE int GetNumberOfThreadsInBlock() { return __ockl_get_local_size(0); }
+DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
+DEVICE unsigned GetLaneId() {
+ return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+}
diff --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu
index d7a0b23667fa..85747511d46c 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu
@@ -92,16 +92,6 @@ DEVICE bool checkRuntimeInitialized(kmp_Ident *loc) {
// support: get info from machine
////////////////////////////////////////////////////////////////////////////////
-////////////////////////////////////////////////////////////////////////////////
-//
-// Calls to the NVPTX layer (assuming 1D layout)
-//
-////////////////////////////////////////////////////////////////////////////////
-
-DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
-
-DEVICE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); }
-
////////////////////////////////////////////////////////////////////////////////
//
// Calls to the Generic Scheme Implementation Layer (assuming 1D layout)
diff --git a/openmp/libomptarget/deviceRTLs/common/support.h b/openmp/libomptarget/deviceRTLs/common/support.h
index d3472842f980..913c4c3c323f 100644
--- a/openmp/libomptarget/deviceRTLs/common/support.h
+++ b/openmp/libomptarget/deviceRTLs/common/support.h
@@ -50,10 +50,6 @@ DEVICE bool checkRuntimeInitialized(kmp_Ident *loc);
// get info from machine
////////////////////////////////////////////////////////////////////////////////
-// get low level ids of resources
-DEVICE unsigned GetWarpId();
-DEVICE unsigned GetLaneId();
-
// get global ids to locate tread/team info (constant regardless of OMP)
DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode);
DEVICE int GetMasterThreadID();
diff --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 4741ce8218e1..04d510b11591 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -189,6 +189,8 @@ INLINE int GetThreadIdInBlock() { return threadIdx.x; }
INLINE int GetBlockIdInKernel() { return blockIdx.x; }
INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; }
INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
+INLINE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
+INLINE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); }
// Return true if this is the first active thread in the warp.
INLINE bool __kmpc_impl_is_first_active_thread() {
More information about the Openmp-commits
mailing list