[Openmp-commits] [PATCH] D66853: [libomptarget] Refactor shfl_down_sync macro to inline function

Jon Chesterfield via Phabricator via Openmp-commits openmp-commits at lists.llvm.org
Tue Aug 27 18:48:30 PDT 2019


This revision was automatically updated to reflect the committed changes.
Closed by commit rL370146: [libomptarget] Refactor shfl_down_sync macro to inline function (authored by JonChesterfield, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D66853?vs=217552&id=217563#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D66853/new/

https://reviews.llvm.org/D66853

Files:
  openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
  openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
  openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h


Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h
===================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/target_impl.h
@@ -43,6 +43,7 @@
 #endif
 
 // In Cuda 9.0, the *_sync() version takes an extra argument 'mask'.
+
 INLINE int32_t __kmpc_impl_shfl_sync(__kmpc_impl_lanemask_t Mask, int32_t Var,
                                      int32_t SrcLane) {
 #if CUDA_VERSION >= 9000
@@ -50,6 +51,15 @@
 #else
   return __shfl(Var, SrcLane);
 #endif // CUDA_VERSION
+
+INLINE int32_t __kmpc_impl_shfl_down_sync(__kmpc_impl_lanemask_t Mask,
+                                          int32_t Var, uint32_t Delta,
+                                          int32_t Width) {
+#if CUDA_VERSION >= 9000
+  return __shfl_down_sync(Mask, Var, Delta, Width);
+#else
+  return __shfl_down(Var, Delta, Width);
+#endif // CUDA_VERSION
 }
 
 INLINE void __kmpc_impl_syncwarp(int32_t Mask) { __SYNCWARP(Mask); }
Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
===================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -51,13 +51,9 @@
 #ifndef CUDA_VERSION
 #error CUDA_VERSION macro is undefined, something wrong with cuda.
 #elif CUDA_VERSION >= 9000
-#define __SHFL_DOWN_SYNC(mask, var, delta, width)                              \
-  __shfl_down_sync((mask), (var), (delta), (width))
 #define __ACTIVEMASK() __activemask()
 #define __SYNCWARP(Mask) __syncwarp(Mask)
 #else
-#define __SHFL_DOWN_SYNC(mask, var, delta, width)                              \
-  __shfl_down((var), (delta), (width))
 #define __ACTIVEMASK() __ballot(1)
 // In Cuda < 9.0 no need to sync threads in warps.
 #define __SYNCWARP(Mask)
Index: openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
===================================================================
--- openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ openmp/trunk/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -15,6 +15,7 @@
 #include <stdio.h>
 
 #include "omptarget-nvptx.h"
+#include "target_impl.h"
 
 EXTERN
 void __kmpc_nvptx_end_reduce(int32_t global_tid) {}
@@ -23,14 +24,14 @@
 void __kmpc_nvptx_end_reduce_nowait(int32_t global_tid) {}
 
 EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) {
-  return __SHFL_DOWN_SYNC(0xFFFFFFFF, val, delta, size);
+  return __kmpc_impl_shfl_down_sync(0xFFFFFFFF, val, delta, size);
 }
 
 EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) {
    int lo, hi;
    asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
-   hi = __SHFL_DOWN_SYNC(0xFFFFFFFF, hi, delta, size);
-   lo = __SHFL_DOWN_SYNC(0xFFFFFFFF, lo, delta, size);
+   hi = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, hi, delta, size);
+   lo = __kmpc_impl_shfl_down_sync(0xFFFFFFFF, lo, delta, size);
    asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
    return val;
 }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D66853.217563.patch
Type: text/x-patch
Size: 3142 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20190828/d4ef1eab/attachment-0001.bin>


More information about the Openmp-commits mailing list