[Openmp-commits] [PATCH] D145831: [OpenMP][libomptarget] Add support for critical regions in AMD GPU device offloading
Gheorghe-Teodor Bercea via Phabricator via Openmp-commits
openmp-commits at lists.llvm.org
Fri Mar 10 15:23:27 PST 2023
doru1004 created this revision.
doru1004 added reviewers: ronl, carlo.bertolli, jdoerfert, dhruvachak, gregrodgers.
doru1004 added a project: OpenMP.
Herald added subscribers: sunshaoce, guansong, yaxunl.
Herald added a project: All.
doru1004 requested review of this revision.
Herald added subscribers: openmp-commits, sstefan1.
This patch adds support for `pragma omp critical` regions when offloading to AMD devices.
Repository:
rG LLVM Github Monorepo
https://reviews.llvm.org/D145831
Files:
openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
openmp/libomptarget/test/offloading/target_critical_region.cpp
Index: openmp/libomptarget/test/offloading/target_critical_region.cpp
===================================================================
--- /dev/null
+++ openmp/libomptarget/test/offloading/target_critical_region.cpp
@@ -0,0 +1,36 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+
+// UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+#include <omp.h>
+#include <stdio.h>
+
+#define N 1000000
+
+int A[N];
+int main() {
+ for (int i = 0; i < N; i++)
+ A[i] = 1;
+
+ int sum[1];
+ sum[0] = 0;
+
+#pragma omp target teams distribute parallel for num_teams(256) \
+ schedule(static, 1) map(to \
+ : A[:N]) map(tofrom \
+ : sum[:1])
+ {
+ for (int i = 0; i < N; i++) {
+#pragma omp critical
+ { sum[0] += A[i]; }
+ }
+ }
+
+ // CHECK: SUM = 1000000
+ printf("SUM = %d\n", sum[0]);
+
+ return 0;
+}
Index: openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
===================================================================
--- openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -262,12 +262,32 @@
void syncThreads() { __builtin_amdgcn_s_barrier(); }
void syncThreadsAligned() { syncThreads(); }
-// TODO: Don't have wavefront lane locks. Possibly can't have them.
-void unsetLock(omp_lock_t *) { __builtin_trap(); }
-int testLock(omp_lock_t *) { __builtin_trap(); }
-void initLock(omp_lock_t *) { __builtin_trap(); }
-void destroyLock(omp_lock_t *) { __builtin_trap(); }
-void setLock(omp_lock_t *) { __builtin_trap(); }
+constexpr uint32_t UNSET = 0;
+constexpr uint32_t SET = 1;
+
+void unsetLock(omp_lock_t *Lock) {
+ (void)atomicExchange((uint32_t *)Lock, UNSET, atomic::seq_cst);
+}
+
+int testLock(omp_lock_t *Lock) {
+ return atomicAdd((uint32_t *)Lock, 0u, atomic::seq_cst);
+}
+
+void initLock(omp_lock_t *Lock) { unsetLock(Lock); }
+void destroyLock(omp_lock_t *Lock) { unsetLock(Lock); }
+
+void setLock(omp_lock_t *Lock) {
+ uint64_t lowestActiveThread = utils::ffs(mapping::activemask()) - 1;
+ if (mapping::getThreadIdInWarp() == lowestActiveThread) {
+ fenceKernel(atomic::release);
+ while (!atomicCAS((uint32_t *)Lock, UNSET, SET, atomic::relaxed,
+ atomic::relaxed) != UNSET) {
+ __builtin_amdgcn_s_sleep(32);
+ }
+ fenceKernel(atomic::aquire);
+ }
+ // test_lock will now return true for any thread in the warp
+}
#pragma omp end declare variant
///}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D145831.504292.patch
Type: text/x-patch
Size: 2689 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/openmp-commits/attachments/20230310/c19648df/attachment.bin>
More information about the Openmp-commits
mailing list