[Openmp-commits] [openmp] r370781 - [libomptarget] Refactor activemask macro to inline function

Jon Chesterfield via Openmp-commits openmp-commits at lists.llvm.org
Tue Sep 3 09:31:30 PDT 2019


Author: jonchesterfield
Date: Tue Sep  3 09:31:30 2019
New Revision: 370781

URL: http://llvm.org/viewvc/llvm-project?rev=370781&view=rev
Log:
[libomptarget] Refactor activemask macro to inline function

Summary:
[libomptarget] Refactor activemask macro to inline function
See also abandoned D66846, split into this diff and others.

Reviewers: jdoerfert, ABataev, grokos, ronlieb, gregrodgers

Reviewed By: jdoerfert, ABataev

Subscribers: openmp-commits

Tags: #openmp

Differential Revision: https://reviews.llvm.org/D66851

Modified:
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu
    openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu?rev=370781&r1=370780&r2=370781&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu Tue Sep  3 09:31:30 2019
@@ -20,7 +20,7 @@ INLINE static unsigned getLaneId() { ret
 
 // Return true if this is the first active thread in the warp.
 INLINE static bool IsWarpMasterActiveThread() {
-  unsigned long long Mask = __ACTIVEMASK();
+  unsigned long long Mask = __kmpc_impl_activemask();
   unsigned long long ShNum = WARPSIZE - (GetThreadIdInBlock() % WARPSIZE);
   unsigned long long Sh = Mask << ShNum;
   // Truncate Sh to the 32 lower bits
@@ -112,7 +112,7 @@ EXTERN void *__kmpc_data_sharing_environ
           (unsigned long long)SharingDefaultDataSize);
 
   unsigned WID = getWarpId();
-  unsigned CurActiveThreads = __ACTIVEMASK();
+  __kmpc_impl_lanemask_t CurActiveThreads = __kmpc_impl_activemask();
 
   __kmpc_data_sharing_slot *&SlotP = DataSharingState.SlotPtr[WID];
   void *&StackP = DataSharingState.StackPtr[WID];
@@ -252,7 +252,7 @@ EXTERN void __kmpc_data_sharing_environm
     return;
   }
 
-  int32_t CurActive = __ACTIVEMASK();
+  __kmpc_impl_lanemask_t CurActive = __kmpc_impl_activemask();
 
   // Only the warp master can restore the stack and frame information, and only
   // if there are no other threads left behind in this environment (i.e. the
@@ -378,7 +378,7 @@ INLINE static void* data_sharing_push_st
   // Frame pointer must be visible to all workers in the same warp.
   const unsigned WID = getWarpId();
   void *FrameP = 0;
-  int32_t CurActive = __ACTIVEMASK();
+  __kmpc_impl_lanemask_t CurActive = __kmpc_impl_activemask();
 
   if (IsWarpMaster) {
     // SlotP will point to either the shared memory slot or an existing

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu?rev=370781&r1=370780&r2=370781&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/loop.cu Tue Sep  3 09:31:30 2019
@@ -389,7 +389,7 @@ public:
   }
 
   INLINE static uint64_t NextIter() {
-    __kmpc_impl_lanemask_t active = __ACTIVEMASK();
+    __kmpc_impl_lanemask_t active = __kmpc_impl_activemask();
     uint32_t leader = __kmpc_impl_ffs(active) - 1;
     uint32_t change = __kmpc_impl_popc(active);
     __kmpc_impl_lanemask_t lane_mask_lt = __kmpc_impl_lanemask_lt();

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h?rev=370781&r1=370780&r2=370781&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h Tue Sep  3 09:31:30 2019
@@ -45,17 +45,6 @@
 #define BARRIER_COUNTER 0
 #define ORDERED_COUNTER 1
 
-// Macros for Cuda intrinsics
-// In Cuda 9.0, the *_sync() version takes an extra argument 'mask'.
-// Also, __ballot(1) in Cuda 8.0 is replaced with __activemask().
-#ifndef CUDA_VERSION
-#error CUDA_VERSION macro is undefined, something wrong with cuda.
-#elif CUDA_VERSION >= 9000
-#define __ACTIVEMASK() __activemask()
-#else
-#define __ACTIVEMASK() __ballot(1)
-#endif // CUDA_VERSION
-
 // arguments needed for L0 parallelism only.
 class omptarget_nvptx_SharedArgs {
 public:

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu?rev=370781&r1=370780&r2=370781&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu Tue Sep  3 09:31:30 2019
@@ -53,7 +53,7 @@ EXTERN bool __kmpc_kernel_convergent_sim
   uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
   *LaneSource += __kmpc_impl_ffs(WorkRemaining);
   *IsFinal = __kmpc_impl_popc(WorkRemaining) == 1;
-  uint32_t lanemask_lt = __kmpc_impl_lanemask_lt();
+  __kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt();
   *LaneId = __kmpc_impl_popc(ConvergentMask & lanemask_lt);
 
   int threadId = GetLogicalThreadIdInBlock(isSPMDMode());
@@ -126,7 +126,7 @@ EXTERN bool __kmpc_kernel_convergent_par
   uint32_t WorkRemaining = ConvergentMask >> (*LaneSource + 1);
   *LaneSource += __kmpc_impl_ffs(WorkRemaining);
   *IsFinal = __kmpc_impl_popc(WorkRemaining) == 1;
-  uint32_t lanemask_lt = __kmpc_impl_lanemask_lt();
+  __kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt();
   uint32_t OmpId = __kmpc_impl_popc(ConvergentMask & lanemask_lt);
 
   int threadId = GetLogicalThreadIdInBlock(isSPMDMode());

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu?rev=370781&r1=370780&r2=370781&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu Tue Sep  3 09:31:30 2019
@@ -61,12 +61,12 @@ INLINE static uint32_t
 gpu_irregular_simd_reduce(void *reduce_data, kmp_ShuffleReductFctPtr shflFct) {
   uint32_t size, remote_id, physical_lane_id;
   physical_lane_id = GetThreadIdInBlock() % WARPSIZE;
-  uint32_t lanemask_lt = __kmpc_impl_lanemask_lt();
-  uint32_t Liveness = __ACTIVEMASK();
+  __kmpc_impl_lanemask_t lanemask_lt = __kmpc_impl_lanemask_lt();
+  __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
   uint32_t logical_lane_id = __kmpc_impl_popc(Liveness & lanemask_lt) * 2;
-  uint32_t lanemask_gt = __kmpc_impl_lanemask_gt();
+  __kmpc_impl_lanemask_t lanemask_gt = __kmpc_impl_lanemask_gt();
   do {
-    Liveness = __ACTIVEMASK();
+    Liveness = __kmpc_impl_activemask();
     remote_id = __kmpc_impl_ffs(Liveness & lanemask_gt);
     size = __kmpc_impl_popc(Liveness);
     logical_lane_id /= 2;
@@ -81,7 +81,7 @@ int32_t __kmpc_nvptx_simd_reduce_nowait(
                                         size_t reduce_size, void *reduce_data,
                                         kmp_ShuffleReductFctPtr shflFct,
                                         kmp_InterWarpCopyFctPtr cpyFct) {
-  uint32_t Liveness = __ACTIVEMASK();
+  __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
   if (Liveness == 0xffffffff) {
     gpu_regular_warp_reduce(reduce_data, shflFct);
     return GetThreadIdInBlock() % WARPSIZE ==
@@ -142,7 +142,7 @@ static int32_t nvptx_parallel_reduce_now
   }
   return BlockThreadId == 0;
 #else
-  uint32_t Liveness = __ACTIVEMASK();
+  __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
   if (Liveness == 0xffffffff) // Full warp
     gpu_regular_warp_reduce(reduce_data, shflFct);
   else if (!(Liveness & (Liveness + 1))) // Partial warp but contiguous lanes
@@ -317,7 +317,7 @@ static int32_t nvptx_teams_reduce_nowait
     ldFct(reduce_data, scratchpad, i, NumTeams, /*Load and reduce*/ 1);
 
   // Reduce across warps to the warp master.
-  uint32_t Liveness = __ACTIVEMASK();
+  __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
   if (Liveness == 0xffffffff) // Full warp
     gpu_regular_warp_reduce(reduce_data, shflFct);
   else // Partial warp but contiguous lanes

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h?rev=370781&r1=370780&r2=370781&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/supporti.h Tue Sep  3 09:31:30 2019
@@ -204,9 +204,9 @@ INLINE int IsTeamMaster(int ompThreadId)
 // Parallel level
 
 INLINE void IncParallelLevel(bool ActiveParallel) {
-  unsigned Active = __ACTIVEMASK();
+  __kmpc_impl_lanemask_t Active = __kmpc_impl_activemask();
   __kmpc_impl_syncwarp(Active);
-  unsigned LaneMaskLt = __kmpc_impl_lanemask_lt();
+  __kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
   unsigned Rank = __kmpc_impl_popc(Active & LaneMaskLt);
   if (Rank == 0) {
     parallelLevel[GetWarpId()] +=
@@ -217,9 +217,9 @@ INLINE void IncParallelLevel(bool Active
 }
 
 INLINE void DecParallelLevel(bool ActiveParallel) {
-  unsigned Active = __ACTIVEMASK();
+  __kmpc_impl_lanemask_t Active = __kmpc_impl_activemask();
   __kmpc_impl_syncwarp(Active);
-  unsigned LaneMaskLt = __kmpc_impl_lanemask_lt();
+  __kmpc_impl_lanemask_t LaneMaskLt = __kmpc_impl_lanemask_lt();
   unsigned Rank = __kmpc_impl_popc(Active & LaneMaskLt);
   if (Rank == 0) {
     parallelLevel[GetWarpId()] -=

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu?rev=370781&r1=370780&r2=370781&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/sync.cu Tue Sep  3 09:31:30 2019
@@ -142,7 +142,7 @@ EXTERN void __kmpc_flush(kmp_Ident *loc)
 
 EXTERN int32_t __kmpc_warp_active_thread_mask() {
   PRINT0(LD_IO, "call __kmpc_warp_active_thread_mask\n");
-  return __ACTIVEMASK();
+  return __kmpc_impl_activemask();
 }
 
 ////////////////////////////////////////////////////////////////////////////////

Modified: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h
URL: http://llvm.org/viewvc/llvm-project/openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h?rev=370781&r1=370780&r2=370781&view=diff
==============================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h (original)
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h Tue Sep  3 09:31:30 2019
@@ -48,6 +48,16 @@ INLINE uint32_t __kmpc_impl_popc(uint32_
 #error CUDA_VERSION macro is undefined, something wrong with cuda.
 #endif
 
+// In Cuda 9.0, __ballot(1) from Cuda 8.0 is replaced with __activemask().
+
+INLINE __kmpc_impl_lanemask_t __kmpc_impl_activemask() {
+#if CUDA_VERSION >= 9000
+  return __activemask();
+#else
+  return __ballot(1);
+#endif
+}
+
 // In Cuda 9.0, the *_sync() version takes an extra argument 'mask'.
 
 INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var,




More information about the Openmp-commits mailing list