[llvm] [OpenMP] Replace AMDGPU fences with generic scoped fences (PR #119619)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 11 13:58:16 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-offload
Author: Joseph Huber (jhuber6)
<details>
<summary>Changes</summary>
Summary:
This is simpler and more common. I would've replaced the CUDA uses and
made this the same but currently it doesn't codegen these fences fully
and just emits a full system wide barrier as a fallback.
---
Full diff: https://github.com/llvm/llvm-project/pull/119619.diff
2 Files Affected:
- (modified) offload/DeviceRTL/include/Synchronization.h (+8)
- (modified) offload/DeviceRTL/src/Synchronization.cpp (+5-39)
``````````diff
diff --git a/offload/DeviceRTL/include/Synchronization.h b/offload/DeviceRTL/include/Synchronization.h
index 874974cc861df2..7a73f9ba72877a 100644
--- a/offload/DeviceRTL/include/Synchronization.h
+++ b/offload/DeviceRTL/include/Synchronization.h
@@ -26,6 +26,14 @@ enum OrderingTy {
seq_cst = __ATOMIC_SEQ_CST,
};
+enum ScopeTy {
+ system = __MEMORY_SCOPE_SYSTEM,
+ device_ = __MEMORY_SCOPE_DEVICE,
+ workgroup = __MEMORY_SCOPE_WRKGRP,
+ wavefront = __MEMORY_SCOPE_WVFRNT,
+ single = __MEMORY_SCOPE_SINGLE,
+};
+
enum MemScopeTy {
all, // All threads on all devices
device, // All threads on the device
diff --git a/offload/DeviceRTL/src/Synchronization.cpp b/offload/DeviceRTL/src/Synchronization.cpp
index 9ea8d171cc830e..3aee23a865d3cf 100644
--- a/offload/DeviceRTL/src/Synchronization.cpp
+++ b/offload/DeviceRTL/src/Synchronization.cpp
@@ -232,50 +232,16 @@ void namedBarrier() {
fence::team(atomic::release);
}
-// sema checking of amdgcn_fence is aggressive. Intention is to patch clang
-// so that it is usable within a template environment and so that a runtime
-// value of the memory order is expanded to this switch within clang/llvm.
void fenceTeam(atomic::OrderingTy Ordering) {
- switch (Ordering) {
- default:
- __builtin_unreachable();
- case atomic::aquire:
- return __builtin_amdgcn_fence(atomic::aquire, "workgroup");
- case atomic::release:
- return __builtin_amdgcn_fence(atomic::release, "workgroup");
- case atomic::acq_rel:
- return __builtin_amdgcn_fence(atomic::acq_rel, "workgroup");
- case atomic::seq_cst:
- return __builtin_amdgcn_fence(atomic::seq_cst, "workgroup");
- }
+ return __scoped_atomic_thread_fence(Ordering, atomic::workgroup);
}
+
void fenceKernel(atomic::OrderingTy Ordering) {
- switch (Ordering) {
- default:
- __builtin_unreachable();
- case atomic::aquire:
- return __builtin_amdgcn_fence(atomic::aquire, "agent");
- case atomic::release:
- return __builtin_amdgcn_fence(atomic::release, "agent");
- case atomic::acq_rel:
- return __builtin_amdgcn_fence(atomic::acq_rel, "agent");
- case atomic::seq_cst:
- return __builtin_amdgcn_fence(atomic::seq_cst, "agent");
- }
+ return __scoped_atomic_thread_fence(Ordering, atomic::device_);
}
+
void fenceSystem(atomic::OrderingTy Ordering) {
- switch (Ordering) {
- default:
- __builtin_unreachable();
- case atomic::aquire:
- return __builtin_amdgcn_fence(atomic::aquire, "");
- case atomic::release:
- return __builtin_amdgcn_fence(atomic::release, "");
- case atomic::acq_rel:
- return __builtin_amdgcn_fence(atomic::acq_rel, "");
- case atomic::seq_cst:
- return __builtin_amdgcn_fence(atomic::seq_cst, "");
- }
+ return __scoped_atomic_thread_fence(Ordering, atomic::system);
}
void syncWarp(__kmpc_impl_lanemask_t) {
``````````
</details>
https://github.com/llvm/llvm-project/pull/119619
More information about the llvm-commits
mailing list