[llvm] [AMDGPU] Update comments in memory legalizer. NFC (PR #160453)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 24 00:22:14 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/160453.diff
1 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (+14-5)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index c85d2bb9fe9ae..e4da22ee04975 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -106,6 +106,7 @@ class SIMemOpInfo final {
bool IsLastUse = false;
bool IsCooperative = false;
+ // TODO: Should we assume Cooperative=true if no MMO is present?
SIMemOpInfo(
const GCNSubtarget &ST,
AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
@@ -334,6 +335,11 @@ class SICacheControl {
bool IsNonTemporal,
bool IsLastUse = false) const = 0;
+ /// Add final touches to a `mayStore` instruction \p MI, which may be a
+ /// Store or RMW instruction.
+ /// FIXME: This takes a MI because iterators aren't handled properly. When
+ /// this is called, they often point to entirely different insts. Thus we back
+ /// up the inst early and pass it here instead.
virtual bool finalizeStore(MachineInstr &MI, bool Atomic) const {
return false;
};
@@ -2368,7 +2374,10 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
// which shares the same L0.
//
// GFX12.5:
- // TODO DOCS
+ // CU$ has two ports. To ensure operations are visible at the workgroup
+ // level, we need to ensure all operations in this port have completed
+ // so the other SIMDs in the WG can see them. There is no ordering
+ // guarantee between the ports.
if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts()) {
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
LOADCnt |= true;
@@ -2483,8 +2492,7 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
// Otherwise in CU mode all waves of a work-group are on the same CU, and
// so the L0 does not need to be invalidated.
//
- // GFX12.5
- // TODO DOCS
+ // GFX12.5 has a shared WGP$, so no invalidates are required.
if (ST.isCuModeEnabled())
return false;
@@ -2528,7 +2536,8 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
++MI;
// global_wb is only necessary at system scope for GFX12.0,
- // they're also necessary at device scope for GFX12.5.
+ // they're also necessary at device scope for GFX12.5 as stores
+ // cannot report completion earlier than L2.
//
// Emitting it for lower scopes is a slow no-op, so we omit it
// for performance.
@@ -2539,7 +2548,7 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
Changed = true;
break;
case SIAtomicScope::AGENT:
- // TODO DOCS
+ // GFX12.5 may have >1 L2 per device so we must emit a device scope WB.
if (ST.hasGFX1250Insts()) {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB))
.addImm(AMDGPU::CPol::SCOPE_DEV);
``````````
</details>
https://github.com/llvm/llvm-project/pull/160453
More information about the llvm-commits
mailing list