[llvm] [openmp] Reapply "[OpenMP][OMPX] Add shfl_down_sync (#93311)" (PR #94139)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 3 08:15:46 PDT 2024
================
@@ -0,0 +1,66 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+//
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <limits>
+#include <ompx.h>
+#include <type_traits>
+
+template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
+bool equal(T LHS, T RHS) {
+ return LHS == RHS;
+}
+
+template <typename T,
+ std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
+bool equal(T LHS, T RHS) {
+ return std::abs(LHS - RHS) < std::numeric_limits<T>::epsilon();
+}
+
+template <typename T> void test() {
+ constexpr const int num_blocks = 1;
+ constexpr const int block_size = 256;
+ constexpr const int N = num_blocks * block_size;
+ int *res = new int[N];
+
+#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) \
+ map(from: res[0:N])
+ {
+ int tid = ompx_thread_id_x();
+ T val = ompx::shfl_down_sync(~0U, static_cast<T>(tid), 1);
+#ifdef __AMDGCN_WAVEFRONT_SIZE
+ int warp_size = __AMDGCN_WAVEFRONT_SIZE;
+#else
+ int warp_size = 32;
+#endif
----------------
shiltian wrote:
Well, the assumption is, we always define this macro for AMDGPUs when compiling device code.
https://github.com/llvm/llvm-project/pull/94139
More information about the llvm-commits
mailing list