[Openmp-commits] [openmp] 918a106 - [libomptarget][nfc] Move GetWarp/LaneId functions into per arch code

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Thu Mar 5 09:06:11 PST 2020


Author: Jon Chesterfield
Date: 2020-03-05T17:05:58Z
New Revision: 918a1065be642a3d5f804c95d7971c2d1b96cdf5

URL: https://github.com/llvm/llvm-project/commit/918a1065be642a3d5f804c95d7971c2d1b96cdf5
DIFF: https://github.com/llvm/llvm-project/commit/918a1065be642a3d5f804c95d7971c2d1b96cdf5.diff

LOG: [libomptarget][nfc] Move GetWarp/LaneId functions into per arch code

Summary:
[libomptarget][nfc] Move GetWarp/LaneId functions into per arch code

No code change for nvptx. Amdgcn currently has two implementations of GetLaneId,
this patch keeps the one a colleague considered to be superior for our ISA.

GetWarpId is currently the same function for amdgcn and nvptx, but I think it's
cleaner to keep it grouped with all the others than to keep it in support.cu.

Reviewers: jdoerfert, grokos, ABataev

Reviewed By: jdoerfert

Subscribers: jvesely, openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D75587

Added: 
    

Modified: 
    openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
    openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
    openmp/libomptarget/deviceRTLs/common/src/support.cu
    openmp/libomptarget/deviceRTLs/common/support.h
    openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h

Removed: 
    


################################################################################
diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
index b3c892323b74..c2d5e329f0a5 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h
@@ -127,6 +127,8 @@ INLINE int GetThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x(); }
 INLINE int GetBlockIdInKernel() { return __builtin_amdgcn_workgroup_id_x(); }
 DEVICE int GetNumberOfBlocksInKernel();
 DEVICE int GetNumberOfThreadsInBlock();
+DEVICE unsigned GetWarpId();
+DEVICE unsigned GetLaneId();
 
 DEVICE bool __kmpc_impl_is_first_active_thread();
 

diff  --git a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
index a32bcd87a648..9807483d4c42 100644
--- a/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
+++ b/openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.hip
@@ -14,14 +14,10 @@
 
 // Implementations initially derived from hcc
 
-static DEVICE uint32_t getLaneId(void) {
-  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
-}
-
 // Initialized with a 64-bit mask with bits set in positions less than the
 // thread's lane number in the warp
 DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
-  uint32_t lane = getLaneId();
+  uint32_t lane = GetLaneId();
   int64_t ballot = __kmpc_impl_activemask();
   uint64_t mask = ((uint64_t)1 << lane) - (uint64_t)1;
   return mask & ballot;
@@ -30,7 +26,7 @@ DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
 // Initialized with a 64-bit mask with bits set in positions greater than the
 // thread's lane number in the warp
 DEVICE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_gt() {
-  uint32_t lane = getLaneId();
+  uint32_t lane = GetLaneId();
   if (lane == (WARPSIZE - 1))
     return 0;
   uint64_t ballot = __kmpc_impl_activemask();
@@ -53,14 +49,14 @@ DEVICE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
 DEVICE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t, int32_t var,
                                      int32_t srcLane) {
   int width = WARPSIZE;
-  int self = getLaneId();
+  int self = GetLaneId();
   int index = srcLane + (self & ~(width - 1));
   return __builtin_amdgcn_ds_bpermute(index << 2, var);
 }
 
 DEVICE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t, int32_t var,
                                           uint32_t laneDelta, int32_t width) {
-  int self = getLaneId();
+  int self = GetLaneId();
   int index = self + laneDelta;
   index = (int)(laneDelta + (self & (width - 1))) >= width ? self : index;
   return __builtin_amdgcn_ds_bpermute(index << 2, var);
@@ -70,3 +66,7 @@ EXTERN uint64_t __ockl_get_local_size(uint32_t);
 EXTERN uint64_t __ockl_get_num_groups(uint32_t);
 DEVICE int GetNumberOfBlocksInKernel() { return __ockl_get_num_groups(0); }
 DEVICE int GetNumberOfThreadsInBlock() { return __ockl_get_local_size(0); }
+DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
+DEVICE unsigned GetLaneId() {
+  return __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));
+}

diff  --git a/openmp/libomptarget/deviceRTLs/common/src/support.cu b/openmp/libomptarget/deviceRTLs/common/src/support.cu
index d7a0b23667fa..85747511d46c 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/support.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/support.cu
@@ -92,16 +92,6 @@ DEVICE bool checkRuntimeInitialized(kmp_Ident *loc) {
 // support: get info from machine
 ////////////////////////////////////////////////////////////////////////////////
 
-////////////////////////////////////////////////////////////////////////////////
-//
-// Calls to the NVPTX layer  (assuming 1D layout)
-//
-////////////////////////////////////////////////////////////////////////////////
-
-DEVICE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
-
-DEVICE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); }
-
 ////////////////////////////////////////////////////////////////////////////////
 //
 // Calls to the Generic Scheme Implementation Layer (assuming 1D layout)

diff  --git a/openmp/libomptarget/deviceRTLs/common/support.h b/openmp/libomptarget/deviceRTLs/common/support.h
index d3472842f980..913c4c3c323f 100644
--- a/openmp/libomptarget/deviceRTLs/common/support.h
+++ b/openmp/libomptarget/deviceRTLs/common/support.h
@@ -50,10 +50,6 @@ DEVICE bool checkRuntimeInitialized(kmp_Ident *loc);
 // get info from machine
 ////////////////////////////////////////////////////////////////////////////////
 
-// get low level ids of resources
-DEVICE unsigned GetWarpId();
-DEVICE unsigned GetLaneId();
-
 // get global ids to locate tread/team info (constant regardless of OMP)
 DEVICE int GetLogicalThreadIdInBlock(bool isSPMDExecutionMode);
 DEVICE int GetMasterThreadID();

diff  --git a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
index 4741ce8218e1..04d510b11591 100644
--- a/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ b/openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -189,6 +189,8 @@ INLINE int GetThreadIdInBlock() { return threadIdx.x; }
 INLINE int GetBlockIdInKernel() { return blockIdx.x; }
 INLINE int GetNumberOfBlocksInKernel() { return gridDim.x; }
 INLINE int GetNumberOfThreadsInBlock() { return blockDim.x; }
+INLINE unsigned GetWarpId() { return GetThreadIdInBlock() / WARPSIZE; }
+INLINE unsigned GetLaneId() { return GetThreadIdInBlock() & (WARPSIZE - 1); }
 
 // Return true if this is the first active thread in the warp.
 INLINE bool __kmpc_impl_is_first_active_thread() {


        


More information about the Openmp-commits mailing list