[llvm] [NFC][AMDGPU] print more info when debugging InsertWaitCnts pass (PR #144629)

Tue Jun 17 19:39:22 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Sameer Sahasrabuddhe (ssahasra)

<details>
<summary>Changes</summary>



---
Full diff: https://github.com/llvm/llvm-project/pull/144629.diff


2 Files Affected:

- (added) llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def (+32) 
- (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+52-22) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def b/llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def
new file mode 100644
index 0000000000000..271db53c2801d
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// An enumeration of all the event types handled by SIInsertWaitcnts.cpp
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+AMDGPU_WAIT_EVENT(VMEM_ACCESS)              // vector-memory read & write
+AMDGPU_WAIT_EVENT(VMEM_READ_ACCESS)         // vector-memory read
+AMDGPU_WAIT_EVENT(VMEM_SAMPLER_READ_ACCESS) // vector-memory SAMPLER read (gfx12+ only)
+AMDGPU_WAIT_EVENT(VMEM_BVH_READ_ACCESS)     // vector-memory BVH read (gfx12+ only)
+AMDGPU_WAIT_EVENT(VMEM_WRITE_ACCESS)        // vector-memory write that is not scratch
+AMDGPU_WAIT_EVENT(SCRATCH_WRITE_ACCESS)     // vector-memory write that may be scratch
+AMDGPU_WAIT_EVENT(LDS_ACCESS)               // lds read & write
+AMDGPU_WAIT_EVENT(GDS_ACCESS)               // gds read & write
+AMDGPU_WAIT_EVENT(SQ_MESSAGE)               // send message
+AMDGPU_WAIT_EVENT(SMEM_ACCESS)              // scalar-memory read & write
+AMDGPU_WAIT_EVENT(EXP_GPR_LOCK)             // export holding on its data src
+AMDGPU_WAIT_EVENT(GDS_GPR_LOCK)             // GDS holding on its data and addr src
+AMDGPU_WAIT_EVENT(EXP_POS_ACCESS)           // write to export position
+AMDGPU_WAIT_EVENT(EXP_PARAM_ACCESS)         // write to export parameter
+AMDGPU_WAIT_EVENT(VMW_GPR_LOCK)             // vector-memory write holding on its data src
+AMDGPU_WAIT_EVENT(EXP_LDS_ACCESS)           // read by ldsdir counting as export
+
+#undef AMDGPU_WAIT_EVENT
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ca8e3244edd15..03a2dc0302780 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -104,24 +104,17 @@ struct HardwareLimits {
   unsigned KmcntMax;     // gfx12+ only.
 };
 
+#define AMDGPU_WAIT_EVENT(Name) Name,
+
 enum WaitEventType {
-  VMEM_ACCESS,              // vector-memory read & write
-  VMEM_READ_ACCESS,         // vector-memory read
-  VMEM_SAMPLER_READ_ACCESS, // vector-memory SAMPLER read (gfx12+ only)
-  VMEM_BVH_READ_ACCESS,     // vector-memory BVH read (gfx12+ only)
-  VMEM_WRITE_ACCESS,        // vector-memory write that is not scratch
-  SCRATCH_WRITE_ACCESS,     // vector-memory write that may be scratch
-  LDS_ACCESS,               // lds read & write
-  GDS_ACCESS,               // gds read & write
-  SQ_MESSAGE,               // send message
-  SMEM_ACCESS,              // scalar-memory read & write
-  EXP_GPR_LOCK,             // export holding on its data src
-  GDS_GPR_LOCK,             // GDS holding on its data and addr src
-  EXP_POS_ACCESS,           // write to export position
-  EXP_PARAM_ACCESS,         // write to export parameter
-  VMW_GPR_LOCK,             // vector-memory write holding on its data src
-  EXP_LDS_ACCESS,           // read by ldsdir counting as export
-  NUM_WAIT_EVENTS,
+#include "AMDGPUWaitEventType.def"
+  NUM_WAIT_EVENTS
+};
+
+#define AMDGPU_WAIT_EVENT(Name) #Name,
+
+static constexpr StringLiteral WaitEventTypeName[] = {
+#include "AMDGPUWaitEventType.def"
 };
 
 // The mapping is:
@@ -1100,6 +1093,20 @@ void WaitcntBrackets::print(raw_ostream &OS) const {
     }
     OS << '\n';
   }
+
+  OS << "Pending Events: ";
+  if (hasPendingEvent()) {
+    ListSeparator LS;
+    for (unsigned I = 0; I != NUM_WAIT_EVENTS; ++I) {
+      if (hasPendingEvent((WaitEventType)I)) {
+        OS << LS << WaitEventTypeName[I];
+      }
+    }
+  } else {
+    OS << "none";
+  }
+  OS << '\n';
+
   OS << '\n';
 }
 
@@ -1265,10 +1272,15 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
   MachineInstr *WaitcntInstr = nullptr;
   MachineInstr *WaitcntVsCntInstr = nullptr;
 
+  LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It << "\n");
+
   for (auto &II :
        make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
-    if (II.isMetaInstruction())
+    LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n");
+    if (II.isMetaInstruction()) {
+      LLVM_DEBUG(dbgs() << "------ skipped\n");
       continue;
+    }
 
     unsigned Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(II.getOpcode());
     bool TrySimplify = Opcode != II.getOpcode() && !OptNone;
@@ -1413,10 +1425,16 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
   MachineInstr *CombinedStoreDsCntInstr = nullptr;
   MachineInstr *WaitInstrs[NUM_EXTENDED_INST_CNTS] = {};
 
+  LLVM_DEBUG(dbgs() << "GFX12Plus::applyPreexistingWaitcnt at: " << *It
+                    << "\n");
+
   for (auto &II :
        make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
-    if (II.isMetaInstruction())
+    LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n");
+    if (II.isMetaInstruction()) {
+      LLVM_DEBUG(dbgs() << "------ skipped\n");
       continue;
+    }
 
     MachineInstr **UpdatableInstr;
 
@@ -2306,7 +2324,9 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
   bool Modified = false;
 
   LLVM_DEBUG({
-    dbgs() << "*** Block" << Block.getNumber() << " ***";
+    dbgs() << "*** Block " << Block.getNumber() << ": ";
+    Block.printName(dbgs());
+    dbgs() << " ***";
     ScoreBrackets.dump();
   });
 
@@ -2437,6 +2457,12 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
   Modified |= generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets,
                               OldWaitcntInstr);
 
+  LLVM_DEBUG({
+    dbgs() << "*** Block end state: " << Block.getNumber() << ": ";
+    Block.printName(dbgs());
+    ScoreBrackets.dump();
+  });
+
   return Modified;
 }
 
@@ -2699,8 +2725,10 @@ bool SIInsertWaitcnts::run(MachineFunction &MF) {
           BlockInfo &SuccBI = SuccBII->second;
           if (!SuccBI.Incoming) {
             SuccBI.Dirty = true;
-            if (SuccBII <= BII)
+            if (SuccBII <= BII) {
+              LLVM_DEBUG(dbgs() << "repeat on backedge\n");
               Repeat = true;
+            }
             if (!MoveBracketsToSucc) {
               MoveBracketsToSucc = &SuccBI;
             } else {
@@ -2708,8 +2736,10 @@ bool SIInsertWaitcnts::run(MachineFunction &MF) {
             }
           } else if (SuccBI.Incoming->merge(*Brackets)) {
             SuccBI.Dirty = true;
-            if (SuccBII <= BII)
+            if (SuccBII <= BII) {
+              LLVM_DEBUG(dbgs() << "repeat on backedge\n");
               Repeat = true;
+            }
           }
         }
         if (MoveBracketsToSucc)

``````````

</details>


https://github.com/llvm/llvm-project/pull/144629