[Openmp-commits] [llvm] [openmp] Reapply "[OpenMP][OMPX] Add shfl_down_sync (#93311)" (PR #94139)
Shilei Tian via Openmp-commits
openmp-commits at lists.llvm.org
Sat Jun 1 21:15:14 PDT 2024
================
@@ -0,0 +1,71 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+//
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+// UNSUPPORTED: aarch64-unknown-linux-gnu
+// UNSUPPORTED: aarch64-unknown-linux-gnu-LTO
+// UNSUPPORTED: s390x-ibm-linux-gnu
+// UNSUPPORTED: s390x-ibm-linux-gnu-LTO
+
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <cstdio>
+#include <limits>
+#include <ompx.h>
+#include <type_traits>
+
+template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
+bool equal(T LHS, T RHS) {
+ return LHS == RHS;
+}
+
+template <typename T,
+ std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
+bool equal(T LHS, T RHS) {
+ return std::abs(LHS - RHS) < std::numeric_limits<T>::epsilon();
+}
+
+template <typename T> void test() {
+ constexpr const int num_blocks = 1;
+ constexpr const int block_size = 256;
+ constexpr const int N = num_blocks * block_size;
+ T *data = new T[N];
+ int *res = new int[N];
+
+ for (int i = 0; i < N; ++i)
+ data[i] = i;
+
+#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) \
+ map(to: data[0:N]) map(from: res[0:N])
+ {
+ int tid = ompx_thread_id_x();
+ data[tid] = ompx::shfl_down_sync(~0U, data[tid], 1);
+#ifdef __AMDGCN_WAVEFRONT_SIZE
+ int warp_size = __AMDGCN_WAVEFRONT_SIZE;
+#else
+ int warp_size = 32;
+#endif
+ if ((tid & (warp_size - 1)) != warp_size - 1)
+ res[tid] = equal(data[tid], static_cast<T>(tid + 1));
+ else
+ res[tid] = equal(data[tid], static_cast<T>(tid));
----------------
shiltian wrote:
Since we don't have a reliable way to get the warp size on the host, I figure it is best to verify the result on device directly.
https://github.com/llvm/llvm-project/pull/94139
More information about the Openmp-commits
mailing list