[llvm] [AMDGPU] Update comments in memory legalizer. NFC (PR #160453)

Wed Sep 24 00:22:14 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/160453.diff


1 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (+14-5) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index c85d2bb9fe9ae..e4da22ee04975 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -106,6 +106,7 @@ class SIMemOpInfo final {
   bool IsLastUse = false;
   bool IsCooperative = false;
 
+  // TODO: Should we assume Cooperative=true if no MMO is present?
   SIMemOpInfo(
       const GCNSubtarget &ST,
       AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
@@ -334,6 +335,11 @@ class SICacheControl {
                                               bool IsNonTemporal,
                                               bool IsLastUse = false) const = 0;
 
+  /// Add final touches to a `mayStore` instruction \p MI, which may be a
+  /// Store or RMW instruction.
+  /// FIXME: This takes a MI because iterators aren't handled properly. When
+  /// this is called, they often point to entirely different insts. Thus we back
+  /// up the inst early and pass it here instead.
   virtual bool finalizeStore(MachineInstr &MI, bool Atomic) const {
     return false;
   };
@@ -2368,7 +2374,10 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
       //   which shares the same L0.
       //
       // GFX12.5:
-      //   TODO DOCS
+      //   CU$ has two ports. To ensure operations are visible at the workgroup
+      //   level, we need to ensure all operations in this port have completed
+      //   so the other SIMDs in the WG can see them. There is no ordering
+      //   guarantee between the ports.
       if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts()) {
         if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
           LOADCnt |= true;
@@ -2483,8 +2492,7 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
     //  Otherwise in CU mode all waves of a work-group are on the same CU, and
     //  so the L0 does not need to be invalidated.
     //
-    // GFX12.5
-    //   TODO DOCS
+    // GFX12.5 has a shared WGP$, so no invalidates are required.
     if (ST.isCuModeEnabled())
       return false;
 
@@ -2528,7 +2536,8 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
       ++MI;
 
     // global_wb is only necessary at system scope for GFX12.0,
-    // they're also necessary at device scope for GFX12.5.
+    // they're also necessary at device scope for GFX12.5 as stores
+    // cannot report completion earlier than L2.
     //
     // Emitting it for lower scopes is a slow no-op, so we omit it
     // for performance.
@@ -2539,7 +2548,7 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
       Changed = true;
       break;
     case SIAtomicScope::AGENT:
-      // TODO DOCS
+      // GFX12.5 may have >1 L2 per device so we must emit a device scope WB.
       if (ST.hasGFX1250Insts()) {
         BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB))
             .addImm(AMDGPU::CPol::SCOPE_DEV);

``````````

</details>


https://github.com/llvm/llvm-project/pull/160453