[llvm] [AMDGPU] Insert waitcnt for non-global fence release in GFX12 (PR #159282)
Fabian Ritter via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 23 01:55:32 PDT 2025
================
@@ -2521,45 +2521,41 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
// writeback as all memory operations by the same thread are
// sequentially consistent, and no other thread can access scratch
// memory.
+ if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE) {
+ if (Pos == Position::AFTER)
+ ++MI;
- // Other address spaces do not have a cache.
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE)
- return false;
-
- if (Pos == Position::AFTER)
- ++MI;
-
- // global_wb is only necessary at system scope for GFX12.0,
- // they're also necessary at device scope for GFX12.5.
- //
- // Emitting it for lower scopes is a slow no-op, so we omit it
- // for performance.
- switch (Scope) {
- case SIAtomicScope::SYSTEM:
- BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB))
- .addImm(AMDGPU::CPol::SCOPE_SYS);
- break;
- case SIAtomicScope::AGENT:
- // TODO DOCS
- if (ST.hasGFX1250Insts()) {
+ // global_wb is only necessary at system scope for GFX12.0,
+ // they're also necessary at device scope for GFX12.5.
+ //
+ // Emitting it for lower scopes is a slow no-op, so we omit it
+ // for performance.
+ switch (Scope) {
+ case SIAtomicScope::SYSTEM:
BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB))
- .addImm(AMDGPU::CPol::SCOPE_DEV);
+ .addImm(AMDGPU::CPol::SCOPE_SYS);
+ break;
+ case SIAtomicScope::AGENT:
+ // TODO DOCS
+ if (ST.hasGFX1250Insts()) {
+ BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB))
+ .addImm(AMDGPU::CPol::SCOPE_DEV);
+ }
+ break;
+ case SIAtomicScope::CLUSTER:
+ case SIAtomicScope::WORKGROUP:
+ // No WB necessary, but we still have to wait.
+ case SIAtomicScope::WAVEFRONT:
+ case SIAtomicScope::SINGLETHREAD:
+ // No WB or wait necessary here, but insertWait takes care of that.
+ break;
----------------
ritter-x2a wrote:
Done.
https://github.com/llvm/llvm-project/pull/159282
More information about the llvm-commits
mailing list