[Openmp-commits] [openmp] 737291f - Add support for critical regions in device code.
Doru Bercea via Openmp-commits
openmp-commits at lists.llvm.org
Fri Mar 24 11:21:13 PDT 2023
Author: Doru Bercea
Date: 2023-03-24T14:20:26-04:00
New Revision: 737291f1691ace49688d6cf0a725ae4579b64dbe
URL: https://github.com/llvm/llvm-project/commit/737291f1691ace49688d6cf0a725ae4579b64dbe
DIFF: https://github.com/llvm/llvm-project/commit/737291f1691ace49688d6cf0a725ae4579b64dbe.diff
LOG: Add support for critical regions in device code.
Review: https://reviews.llvm.org/D145831
Added:
openmp/libomptarget/test/offloading/target_critical_region.cpp
Modified:
openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
Removed:
################################################################################
diff --git a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
index b85e1b3bb2fa4..90d03dd490b24 100644
--- a/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
+++ b/openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
@@ -130,6 +130,8 @@ int testLock(omp_lock_t *);
void initLock(omp_lock_t *);
void destroyLock(omp_lock_t *);
void setLock(omp_lock_t *);
+void unsetCriticalLock(omp_lock_t *);
+void setCriticalLock(omp_lock_t *);
/// AMDGCN Implementation
///
@@ -269,6 +271,25 @@ void initLock(omp_lock_t *) { __builtin_trap(); }
void destroyLock(omp_lock_t *) { __builtin_trap(); }
void setLock(omp_lock_t *) { __builtin_trap(); }
+constexpr uint32_t UNSET = 0;
+constexpr uint32_t SET = 1;
+
+void unsetCriticalLock(omp_lock_t *Lock) {
+ (void)atomicExchange((uint32_t *)Lock, UNSET, atomic::acq_rel);
+}
+
+void setCriticalLock(omp_lock_t *Lock) {
+ uint64_t LowestActiveThread = utils::ffs(mapping::activemask()) - 1;
+ if (mapping::getThreadIdInWarp() == LowestActiveThread) {
+ fenceKernel(atomic::release);
+ while (!atomicCAS((uint32_t *)Lock, UNSET, SET, atomic::relaxed,
+ atomic::relaxed)) {
+ __builtin_amdgcn_s_sleep(32);
+ }
+ fenceKernel(atomic::aquire);
+ }
+}
+
#pragma omp end declare variant
///}
@@ -450,6 +471,14 @@ uint32_t atomic::inc(uint32_t *Addr, uint32_t V, atomic::OrderingTy Ordering) {
return impl::atomicInc(Addr, V, Ordering);
}
+void unsetCriticalLock(omp_lock_t *Lock) {
+ impl::unsetLock(Lock);
+}
+
+void setCriticalLock(omp_lock_t *Lock) {
+ impl::setLock(Lock);
+}
+
extern "C" {
void __kmpc_ordered(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }
@@ -518,12 +547,12 @@ void __kmpc_syncwarp(uint64_t Mask) {
void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name) {
FunctionTracingRAII();
- omp_set_lock(reinterpret_cast<omp_lock_t *>(Name));
+ impl::setCriticalLock(reinterpret_cast<omp_lock_t *>(Name));
}
void __kmpc_end_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name) {
FunctionTracingRAII();
- omp_unset_lock(reinterpret_cast<omp_lock_t *>(Name));
+ impl::unsetCriticalLock(reinterpret_cast<omp_lock_t *>(Name));
}
void omp_init_lock(omp_lock_t *Lock) { impl::initLock(Lock); }
diff --git a/openmp/libomptarget/test/offloading/target_critical_region.cpp b/openmp/libomptarget/test/offloading/target_critical_region.cpp
new file mode 100644
index 0000000000000..64e1adbf6bf28
--- /dev/null
+++ b/openmp/libomptarget/test/offloading/target_critical_region.cpp
@@ -0,0 +1,36 @@
+// RUN: %libomptarget-compilexx-run-and-check-generic
+
+// UNSUPPORTED: nvptx64-nvidia-cuda
+// UNSUPPORTED: nvptx64-nvidia-cuda-LTO
+// UNSUPPORTED: x86_64-pc-linux-gnu
+// UNSUPPORTED: x86_64-pc-linux-gnu-LTO
+
+#include <omp.h>
+#include <stdio.h>
+
+#define N 1000000
+
+int A[N];
+int main() {
+ for (int i = 0; i < N; i++)
+ A[i] = 1;
+
+ int sum[1];
+ sum[0] = 0;
+
+#pragma omp target teams distribute parallel for num_teams(256) \
+ schedule(static, 1) map(to \
+ : A[:N]) map(tofrom \
+ : sum[:1])
+ {
+ for (int i = 0; i < N; i++) {
+#pragma omp critical
+ { sum[0] += A[i]; }
+ }
+ }
+
+ // CHECK: SUM = 1000000
+ printf("SUM = %d\n", sum[0]);
+
+ return 0;
+}
More information about the Openmp-commits
mailing list