[Openmp-commits] [PATCH] D68369: Use -1 to indicate all lanes, to handle 32 and 64 wide architectures
Jon Chesterfield via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Wed Oct 2 18:44:52 PDT 2019
JonChesterfield created this revision.
JonChesterfield added reviewers: ABataev, jdoerfert, grokos.
Herald added a project: OpenMP.
Herald added a subscriber: openmp-commits.
Use -1 to indicate all lanes, to handle 32 and 64 wide architectures
Could alternatively add a constant to target_impl if preferred.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D68369
Files:
openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
Index: openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ openmp/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -24,14 +24,14 @@
void __kmpc_nvptx_end_reduce_nowait(int32_t global_tid) {}
EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) {
- return __kmpc_impl_shfl_down_sync(0xFFFFFFFF, val, delta, size);
+ return __kmpc_impl_shfl_down_sync(-1, val, delta, size);
}
EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) {
uint32_t lo, hi;
__kmpc_impl_unpack(val, lo, hi);
- hi = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, hi, delta, size);
- lo = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, lo, delta, size);
+ hi = __kmpc_impl_shfl_down_sync(-1, hi, delta, size);
+ lo = __kmpc_impl_shfl_down_sync(-1, lo, delta, size);
return __kmpc_impl_pack(lo, hi);
}
@@ -82,7 +82,7 @@
kmp_ShuffleReductFctPtr shflFct,
kmp_InterWarpCopyFctPtr cpyFct) {
__kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
- if (Liveness == 0xffffffff) {
+ if (Liveness == -1) {
gpu_regular_warp_reduce(reduce_data, shflFct);
return GetThreadIdInBlock() % WARPSIZE ==
0; // Result on lane 0 of the simd warp.
@@ -143,7 +143,7 @@
return BlockThreadId == 0;
#else
__kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
- if (Liveness == 0xffffffff) // Full warp
+ if (Liveness == -1) // Full warp
gpu_regular_warp_reduce(reduce_data, shflFct);
else if (!(Liveness & (Liveness + 1))) // Partial warp but contiguous lanes
gpu_irregular_warp_reduce(reduce_data, shflFct,
@@ -318,7 +318,7 @@
// Reduce across warps to the warp master.
__kmpc_impl_lanemask_t Liveness = __kmpc_impl_activemask();
- if (Liveness == 0xffffffff) // Full warp
+ if (Liveness == -1) // Full warp
gpu_regular_warp_reduce(reduce_data, shflFct);
else // Partial warp but contiguous lanes
gpu_irregular_warp_reduce(reduce_data, shflFct,
Index: openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ openmp/libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -320,7 +320,7 @@
// can be changed incorrectly because of threads divergence.
bool IsActiveParallelRegion = threadsInTeam != 1;
IncParallelLevel(IsActiveParallelRegion,
- IsActiveParallelRegion ? 0xFFFFFFFF : 1u);
+ IsActiveParallelRegion ? -1 : 1u);
}
return isActive;
@@ -347,7 +347,7 @@
// be changed incorrectly because of threads divergence.
bool IsActiveParallelRegion = threadsInTeam != 1;
DecParallelLevel(IsActiveParallelRegion,
- IsActiveParallelRegion ? 0xFFFFFFFF : 1u);
+ IsActiveParallelRegion ? -1 : 1u);
}
////////////////////////////////////////////////////////////////////////////////
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D68369.222950.patch
Type: text/x-patch
Size: 3123 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20191003/77b0d89b/attachment.bin>
More information about the Openmp-commits
mailing list