[PATCH] D68369: Use named constant to indicate all lanes, to handle 32 and 64 wide architectures

Jon Chesterfield via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 4 14:40:50 PDT 2019


This revision was automatically updated to reflect the committed changes.
Closed by commit rL373793: Use named constant to indicate all lanes, to handle 32 and 64 wide architectures (authored by JonChesterfield, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D68369?vs=222958&id=223299#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D68369/new/

https://reviews.llvm.org/D68369

Files:
  openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
  openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
  openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h


Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
===================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -24,14 +24,14 @@
 void __kmpc_nvptx_end_reduce_nowait(int32_t global_tid) {}
 
 EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) {
-  return __kmpc_impl_shfl_down_sync(0xFFFFFFFF, val, delta, size);
+  return __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, val, delta, size);
 }
 
 EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) {
    uint32_t lo, hi;
    __kmpc_impl_unpack(val, lo, hi);
-   hi = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, hi, delta, size);
-   lo = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, lo, delta, size);
+   hi = __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, hi, delta, size);
+   lo = __kmpc_impl_shfl_down_sync(__kmpc_impl_all_lanes, lo, delta, size);
    return __kmpc_impl_pack(lo, hi);
 }
 
@@ -82,7 +82,7 @@
                                         kmp_ShuffleReductFctPtr shflFct,
                                         kmp_InterWarpCopyFctPtr cpyFct) {
   __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
-  if (Liveness == 0xffffffff) {
+  if (Liveness == __kmpc_impl_all_lanes) {
     gpu_regular_warp_reduce(reduce_data, shflFct);
     return GetThreadIdInBlock() % WARPSIZE ==
            0; // Result on lane 0 of the simd warp.
@@ -143,7 +143,7 @@
   return BlockThreadId == 0;
 #else
   __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
-  if (Liveness == 0xffffffff) // Full warp
+  if (Liveness == __kmpc_impl_all_lanes) // Full warp
     gpu_regular_warp_reduce(reduce_data, shflFct);
   else if (!(Liveness & (Liveness + 1))) // Partial warp but contiguous lanes
     gpu_irregular_warp_reduce(reduce_data, shflFct,
@@ -318,7 +318,7 @@
 
   // Reduce across warps to the warp master.
   __kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
-  if (Liveness == 0xffffffff) // Full warp
+  if (Liveness == __kmpc_impl_all_lanes) // Full warp
     gpu_regular_warp_reduce(reduce_data, shflFct);
   else // Partial warp but contiguous lanes
     gpu_irregular_warp_reduce(reduce_data, shflFct,
Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
===================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -320,7 +320,7 @@
     // can be changed incorrectly because of threads divergence.
     bool IsActiveParallelRegion = threadsInTeam != 1;
     IncParallelLevel(IsActiveParallelRegion,
-                     IsActiveParallelRegion ? 0xFFFFFFFF : 1u);
+                     IsActiveParallelRegion ? __kmpc_impl_all_lanes : 1u);
   }
 
   return isActive;
@@ -347,7 +347,7 @@
   // be changed incorrectly because of threads divergence.
     bool IsActiveParallelRegion = threadsInTeam != 1;
     DecParallelLevel(IsActiveParallelRegion,
-                     IsActiveParallelRegion ? 0xFFFFFFFF : 1u);
+                     IsActiveParallelRegion ? __kmpc_impl_all_lanes : 1u);
 }
 
 ////////////////////////////////////////////////////////////////////////////////
Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h
===================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -27,6 +27,8 @@
 }
 
 typedef uint32_t __kmpc_impl_lanemask_t;
+static const __kmpc_impl_lanemask_t __kmpc_impl_all_lanes =
+    UINT32_C(0xffffffff);
 
 INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
   __kmpc_impl_lanemask_t res;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D68369.223299.patch
Type: text/x-patch
Size: 3822 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191004/1390eb13/attachment.bin>


More information about the llvm-commits mailing list