[Openmp-commits] [openmp] cf9eeb6 - Revert "Reapply "[OpenMP][OMPX] Add shfl_down_sync (#93311)""

Shilei Tian via Openmp-commits openmp-commits at lists.llvm.org
Sat May 25 22:04:50 PDT 2024


Author: Shilei Tian
Date: 2024-05-26T01:04:39-04:00
New Revision: cf9eeb67e553137c979dca50bbf912acea8889a5

URL: https://github.com/llvm/llvm-project/commit/cf9eeb67e553137c979dca50bbf912acea8889a5
DIFF: https://github.com/llvm/llvm-project/commit/cf9eeb67e553137c979dca50bbf912acea8889a5.diff

LOG: Revert "Reapply "[OpenMP][OMPX] Add shfl_down_sync (#93311)""

This reverts commit 7b4865582299294455bc816358fd88a9c6e5e0be.

Added: 
    

Modified: 
    offload/DeviceRTL/include/Utils.h
    offload/DeviceRTL/src/Mapping.cpp
    offload/DeviceRTL/src/Utils.cpp
    openmp/runtime/src/include/ompx.h.var

Removed: 
    offload/test/offloading/ompx_bare_shfl_down_sync.cpp


################################################################################
diff  --git a/offload/DeviceRTL/include/Utils.h b/offload/DeviceRTL/include/Utils.h
index 82e2397b5958b..d43b7f5c95de1 100644
--- a/offload/DeviceRTL/include/Utils.h
+++ b/offload/DeviceRTL/include/Utils.h
@@ -25,8 +25,6 @@ int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane);
 
 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width);
 
-int64_t shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, int32_t Width);
-
 uint64_t ballotSync(uint64_t Mask, int32_t Pred);
 
 /// Return \p LowBits and \p HighBits packed into a single 64 bit value.

diff  --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp
index c1ce878746a69..4f39d2a299ee6 100644
--- a/offload/DeviceRTL/src/Mapping.cpp
+++ b/offload/DeviceRTL/src/Mapping.cpp
@@ -364,30 +364,8 @@ _TGT_KERNEL_LANGUAGE(block_id, getBlockIdInKernel)
 _TGT_KERNEL_LANGUAGE(block_dim, getNumberOfThreadsInBlock)
 _TGT_KERNEL_LANGUAGE(grid_dim, getNumberOfBlocksInKernel)
 
-extern "C" {
-uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
+extern "C" uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
   return utils::ballotSync(mask, pred);
 }
 
-int ompx_shfl_down_sync_i(uint64_t mask, int var, unsigned delta, int width) {
-  return utils::shuffleDown(mask, var, delta, width);
-}
-
-float ompx_shfl_down_sync_f(uint64_t mask, float var, unsigned delta,
-                            int width) {
-  return utils::convertViaPun<float>(utils::shuffleDown(
-      mask, utils::convertViaPun<int32_t>(var), delta, width));
-}
-
-long ompx_shfl_down_sync_l(uint64_t mask, long var, unsigned delta, int width) {
-  return utils::shuffleDown(mask, var, delta, width);
-}
-
-double ompx_shfl_down_sync_d(uint64_t mask, double var, unsigned delta,
-                             int width) {
-  return utils::convertViaPun<double>(utils::shuffleDown(
-      mask, utils::convertViaPun<int64_t>(var), delta, width));
-}
-}
-
 #pragma omp end declare target

diff  --git a/offload/DeviceRTL/src/Utils.cpp b/offload/DeviceRTL/src/Utils.cpp
index 53cc803234867..606e3bec0d33c 100644
--- a/offload/DeviceRTL/src/Utils.cpp
+++ b/offload/DeviceRTL/src/Utils.cpp
@@ -113,15 +113,6 @@ int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
   return impl::shuffleDown(Mask, Var, Delta, Width);
 }
 
-int64_t utils::shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta,
-                           int32_t Width) {
-  uint32_t Lo, Hi;
-  utils::unpack(Var, Lo, Hi);
-  Hi = impl::shuffleDown(Mask, Hi, Delta, Width);
-  Lo = impl::shuffleDown(Mask, Lo, Delta, Width);
-  return utils::pack(Lo, Hi);
-}
-
 uint64_t utils::ballotSync(uint64_t Mask, int32_t Pred) {
   return impl::ballotSync(Mask, Pred);
 }
@@ -134,7 +125,11 @@ int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) {
 }
 
 int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) {
-  return utils::shuffleDown(lanes::All, Val, Delta, Width);
+  uint32_t lo, hi;
+  utils::unpack(Val, lo, hi);
+  hi = impl::shuffleDown(lanes::All, hi, Delta, Width);
+  lo = impl::shuffleDown(lanes::All, lo, Delta, Width);
+  return utils::pack(lo, hi);
 }
 }
 

diff  --git a/offload/test/offloading/ompx_bare_shfl_down_sync.cpp b/offload/test/offloading/ompx_bare_shfl_down_sync.cpp
deleted file mode 100644
index 66044dc461c53..0000000000000
--- a/offload/test/offloading/ompx_bare_shfl_down_sync.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-// RUN: %libomptarget-compilexx-run-and-check-generic
-//
-// UNSUPPORTED: x86_64-pc-linux-gnu
-// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
-// UNSUPPORTED: aarch64-unknown-linux-gnu
-// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
-// UNSUPPORTED: s390x-ibm-linux-gnu
-// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
-
-#ifdef __AMDGCN_WAVEFRONT_SIZE
-#define WARP_SIZE __AMDGCN_WAVEFRONT_SIZE
-#else
-#define WARP_SIZE 32
-#endif
-
-#include <cassert>
-#include <cmath>
-#include <cstdint>
-#include <cstdio>
-#include <limits>
-#include <ompx.h>
-#include <type_traits>
-
-template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
-bool equal(T LHS, T RHS) {
-  return LHS == RHS;
-}
-
-template <typename T,
-          std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
-bool equal(T LHS, T RHS) {
-  return std::abs(LHS - RHS) < std::numeric_limits<T>::epsilon();
-}
-
-template <typename T> void test() {
-  constexpr const int num_blocks = 1;
-  constexpr const int block_size = 256;
-  constexpr const int N = num_blocks * block_size;
-  T *data = new T[N];
-
-  for (int i = 0; i < N; ++i)
-    data[i] = i;
-
-#pragma omp target teams ompx_bare num_teams(num_blocks)                       \
-    thread_limit(block_size) map(tofrom : data[0 : N])
-  {
-    int tid = ompx_thread_id_x();
-    data[tid] = ompx::shfl_down_sync(~0U, data[tid], 1);
-  }
-
-  for (int i = N - 1; i > 0; i -= WARP_SIZE)
-    for (int j = i; j > i - WARP_SIZE + 1; --j)
-      assert(equal(data[i], data[i - 1]));
-
-  delete[] data;
-}
-
-int main(int argc, char *argv[]) {
-  test<int32_t>();
-  test<int64_t>();
-  test<float>();
-  test<double>();
-  // CHECK: PASS
-  printf("PASS\n");
-
-  return 0;
-}

diff  --git a/openmp/runtime/src/include/ompx.h.var b/openmp/runtime/src/include/ompx.h.var
index 623f0b9c315bd..19851880c3ac3 100644
--- a/openmp/runtime/src/include/ompx.h.var
+++ b/openmp/runtime/src/include/ompx.h.var
@@ -9,12 +9,6 @@
 #ifndef __OMPX_H
 #define __OMPX_H
 
-#ifdef __AMDGCN_WAVEFRONT_SIZE
-#define __WARP_SIZE __AMDGCN_WAVEFRONT_SIZE
-#else
-#define __WARP_SIZE 32
-#endif
-
 typedef unsigned long uint64_t;
 
 #ifdef __cplusplus
@@ -81,11 +75,11 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_C(grid_dim, 1)
   static inline RETTY ompx_##NAME(ARGS) { BODY; }
 
 _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block, int Ordering,
-                                      _Pragma("omp barrier"))
+                                      _Pragma("omp barrier"));
 _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_acq_rel, void,
-                                      ompx_sync_block(ompx_acq_rel))
+                                      ompx_sync_block(ompx_acq_rel));
 _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C(void, sync_block_divergent, int Ordering,
-                                      ompx_sync_block(Ordering))
+                                      ompx_sync_block(Ordering));
 #undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_C
 ///}
 
@@ -93,22 +87,6 @@ static inline uint64_t ompx_ballot_sync(uint64_t mask, int pred) {
   __builtin_trap();
 }
 
-/// ompx_shfl_down_sync_{i,f,l,d}
-///{
-#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(TYPE, TY)                \
-  static inline TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var,         \
-                                              unsigned delta, int width) {     \
-    __builtin_trap();                                                          \
-  }
-
-_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(int, i)
-_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(float, f)
-_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(long, l)
-_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d)
-
-#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL
-///}
-
 #pragma omp end declare variant
 
 /// ompx_{sync_block}_{,divergent}
@@ -116,9 +94,9 @@ _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC_HOST_IMPL(double, d)
 #define _TGT_KERNEL_LANGUAGE_DECL_SYNC_C(RETTY, NAME, ARGS)         \
   RETTY ompx_##NAME(ARGS);
 
-_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering)
-_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void)
-_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering)
+_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block, int Ordering);
+_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_acq_rel, void);
+_TGT_KERNEL_LANGUAGE_DECL_SYNC_C(void, sync_block_divergent, int Ordering);
 #undef _TGT_KERNEL_LANGUAGE_DECL_SYNC_C
 ///}
 
@@ -139,20 +117,6 @@ _TGT_KERNEL_LANGUAGE_DECL_GRID_C(grid_dim)
 
 uint64_t ompx_ballot_sync(uint64_t mask, int pred);
 
-/// ompx_shfl_down_sync_{i,f,l,d}
-///{
-#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY)                          \
-  TYPE ompx_shfl_down_sync_##TY(uint64_t mask, TYPE var, unsigned delta,       \
-                                int width);
-
-_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
-_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
-_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
-_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
-
-#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
-///}
-
 #ifdef __cplusplus
 }
 #endif
@@ -198,9 +162,9 @@ _TGT_KERNEL_LANGUAGE_HOST_IMPL_GRID_CXX(grid_dim)
   }
 
 _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block, int Ordering = acc_rel,
-                                        Ordering)
+                                        Ordering);
 _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX(void, sync_block_divergent,
-                                        int Ordering = acc_rel, Ordering)
+                                        int Ordering = acc_rel, Ordering);
 #undef _TGT_KERNEL_LANGUAGE_HOST_IMPL_SYNC_CXX
 ///}
 
@@ -208,22 +172,6 @@ static inline uint64_t ballot_sync(uint64_t mask, int pred) {
   return ompx_ballot_sync(mask, pred);
 }
 
-/// shfl_down_sync
-///{
-#define _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(TYPE, TY)                          \
-  static inline TYPE shfl_down_sync(uint64_t mask, TYPE var, unsigned delta,   \
-                                    int width = __WARP_SIZE) {                 \
-    return ompx_shfl_down_sync_##TY(mask, var, delta, width);                  \
-  }
-
-_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(int, i)
-_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(float, f)
-_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(long, l)
-_TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC(double, d)
-
-#undef _TGT_KERNEL_LANGUAGE_SHFL_DOWN_SYNC
-///}
-
 } // namespace ompx
 #endif
 


        


More information about the Openmp-commits mailing list