[Openmp-commits] [PATCH] D66857: [libomptarget] Refactor syncwarp macro to inline function
Jon Chesterfield via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Tue Aug 27 18:23:05 PDT 2019
JonChesterfield created this revision.
JonChesterfield added reviewers: jdoerfert, ABataev, grokos, ronlieb, gregrodgers.
Herald added a project: OpenMP.
Herald added a subscriber: openmp-commits.
[libomptarget] Refactor syncwarp macro to inline function
See also abandoned D66846 <https://reviews.llvm.org/D66846>, split into this diff and others.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D66857
Files:
openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
Index: openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -38,6 +38,16 @@
INLINE int __kmpc_impl_popc(uint32_t x) { return __popc(x); }
-INLINE void __kmpc_impl_syncwarp(int32_t Mask) { __SYNCWARP(Mask); }
+#ifndef CUDA_VERSION
+#error CUDA_VERSION macro is undefined, something wrong with cuda.
+#endif
+
+INLINE void __kmpc_impl_syncwarp(__kmpc_impl_lanemask_t Mask) {
+#if CUDA_VERSION >= 9000
+ __syncwarp(Mask);
+#else
+ // In Cuda < 9.0 no need to sync threads in warps.
+#endif
+}
#endif
Index: openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
+++ openmp/libomptarget/deviceRTLs/nvptx/src/supporti.h
@@ -14,6 +14,8 @@
// Execution Parameters
////////////////////////////////////////////////////////////////////////////////
+#include "target_impl.h"
+
INLINE void setExecutionParameters(ExecutionMode EMode, RuntimeMode RMode) {
execution_param = EMode;
execution_param |= RMode;
@@ -203,7 +205,7 @@
INLINE void IncParallelLevel(bool ActiveParallel) {
unsigned Active = __ACTIVEMASK();
- __SYNCWARP(Active);
+ __kmpc_impl_syncwarp(Active);
unsigned LaneMaskLt;
asm("mov.u32 %0, %%lanemask_lt;" : "=r"(LaneMaskLt));
unsigned Rank = __popc(Active & LaneMaskLt);
@@ -212,12 +214,12 @@
(1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
__threadfence();
}
- __SYNCWARP(Active);
+ __kmpc_impl_syncwarp(Active);
}
INLINE void DecParallelLevel(bool ActiveParallel) {
unsigned Active = __ACTIVEMASK();
- __SYNCWARP(Active);
+ __kmpc_impl_syncwarp(Active);
unsigned LaneMaskLt;
asm("mov.u32 %0, %%lanemask_lt;" : "=r"(LaneMaskLt));
unsigned Rank = __popc(Active & LaneMaskLt);
@@ -226,7 +228,7 @@
(1 + (ActiveParallel ? OMP_ACTIVE_PARALLEL_LEVEL : 0));
__threadfence();
}
- __SYNCWARP(Active);
+ __kmpc_impl_syncwarp(Active);
}
////////////////////////////////////////////////////////////////////////////////
Index: openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
===================================================================
--- openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ openmp/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -55,14 +55,11 @@
#define __SHFL_DOWN_SYNC(mask, var, delta, width) \
__shfl_down_sync((mask), (var), (delta), (width))
#define __ACTIVEMASK() __activemask()
-#define __SYNCWARP(Mask) __syncwarp(Mask)
#else
#define __SHFL_SYNC(mask, var, srcLane) __shfl((var), (srcLane))
#define __SHFL_DOWN_SYNC(mask, var, delta, width) \
__shfl_down((var), (delta), (width))
#define __ACTIVEMASK() __ballot(1)
-// In Cuda < 9.0 no need to sync threads in warps.
-#define __SYNCWARP(Mask)
#endif // CUDA_VERSION
#define __SYNCTHREADS_N(n) asm volatile("bar.sync %0;" : : "r"(n) : "memory");
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D66857.217555.patch
Type: text/x-patch
Size: 3141 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20190828/13b6aa1d/attachment-0001.bin>
More information about the Openmp-commits
mailing list