[llvm] 62fe5e4 - [NFC][AMDGPU] print more info when debugging SIInsertWaitcnts pass (#144629)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 19 01:17:40 PDT 2025
Author: Sameer Sahasrabuddhe
Date: 2025-06-19T13:47:37+05:30
New Revision: 62fe5e428acc2c5ef9b144c5737d55b17b55feac
URL: https://github.com/llvm/llvm-project/commit/62fe5e428acc2c5ef9b144c5737d55b17b55feac
DIFF: https://github.com/llvm/llvm-project/commit/62fe5e428acc2c5ef9b144c5737d55b17b55feac.diff
LOG: [NFC][AMDGPU] print more info when debugging SIInsertWaitcnts pass (#144629)
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ca8e3244edd15..f7b88bf2d5ebc 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -104,25 +104,38 @@ struct HardwareLimits {
unsigned KmcntMax; // gfx12+ only.
};
+#define AMDGPU_DECLARE_WAIT_EVENTS(DECL) \
+ DECL(VMEM_ACCESS) /* vmem read & write */ \
+ DECL(VMEM_READ_ACCESS) /* vmem read */ \
+ DECL(VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \
+ DECL(VMEM_BVH_READ_ACCESS) /* vmem BVH read (gfx12+ only) */ \
+ DECL(VMEM_WRITE_ACCESS) /* vmem write that is not scratch */ \
+ DECL(SCRATCH_WRITE_ACCESS) /* vmem write that may be scratch */ \
+ DECL(LDS_ACCESS) /* lds read & write */ \
+ DECL(GDS_ACCESS) /* gds read & write */ \
+ DECL(SQ_MESSAGE) /* send message */ \
+ DECL(SMEM_ACCESS) /* scalar-memory read & write */ \
+ DECL(EXP_GPR_LOCK) /* export holding on its data src */ \
+ DECL(GDS_GPR_LOCK) /* GDS holding on its data and addr src */ \
+ DECL(EXP_POS_ACCESS) /* write to export position */ \
+ DECL(EXP_PARAM_ACCESS) /* write to export parameter */ \
+ DECL(VMW_GPR_LOCK) /* vmem write holding on its data src */ \
+ DECL(EXP_LDS_ACCESS) /* read by ldsdir counting as export */
+
+// clang-format off
+#define AMDGPU_EVENT_ENUM(Name) Name,
enum WaitEventType {
- VMEM_ACCESS, // vector-memory read & write
- VMEM_READ_ACCESS, // vector-memory read
- VMEM_SAMPLER_READ_ACCESS, // vector-memory SAMPLER read (gfx12+ only)
- VMEM_BVH_READ_ACCESS, // vector-memory BVH read (gfx12+ only)
- VMEM_WRITE_ACCESS, // vector-memory write that is not scratch
- SCRATCH_WRITE_ACCESS, // vector-memory write that may be scratch
- LDS_ACCESS, // lds read & write
- GDS_ACCESS, // gds read & write
- SQ_MESSAGE, // send message
- SMEM_ACCESS, // scalar-memory read & write
- EXP_GPR_LOCK, // export holding on its data src
- GDS_GPR_LOCK, // GDS holding on its data and addr src
- EXP_POS_ACCESS, // write to export position
- EXP_PARAM_ACCESS, // write to export parameter
- VMW_GPR_LOCK, // vector-memory write holding on its data src
- EXP_LDS_ACCESS, // read by ldsdir counting as export
- NUM_WAIT_EVENTS,
+ AMDGPU_DECLARE_WAIT_EVENTS(AMDGPU_EVENT_ENUM)
+ NUM_WAIT_EVENTS
};
+#undef AMDGPU_EVENT_ENUM
+
+#define AMDGPU_EVENT_NAME(Name) #Name,
+static constexpr StringLiteral WaitEventTypeName[] = {
+ AMDGPU_DECLARE_WAIT_EVENTS(AMDGPU_EVENT_NAME)
+};
+#undef AMDGPU_EVENT_NAME
+// clang-format on
// The mapping is:
// 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs
@@ -1100,6 +1113,20 @@ void WaitcntBrackets::print(raw_ostream &OS) const {
}
OS << '\n';
}
+
+ OS << "Pending Events: ";
+ if (hasPendingEvent()) {
+ ListSeparator LS;
+ for (unsigned I = 0; I != NUM_WAIT_EVENTS; ++I) {
+ if (hasPendingEvent((WaitEventType)I)) {
+ OS << LS << WaitEventTypeName[I];
+ }
+ }
+ } else {
+ OS << "none";
+ }
+ OS << '\n';
+
OS << '\n';
}
@@ -1265,10 +1292,15 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
MachineInstr *WaitcntInstr = nullptr;
MachineInstr *WaitcntVsCntInstr = nullptr;
+ LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It);
+
for (auto &II :
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
- if (II.isMetaInstruction())
+ LLVM_DEBUG(dbgs() << "pre-existing iter: " << II);
+ if (II.isMetaInstruction()) {
+ LLVM_DEBUG(dbgs() << "skipped meta instruction\n");
continue;
+ }
unsigned Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(II.getOpcode());
bool TrySimplify = Opcode != II.getOpcode() && !OptNone;
@@ -1320,9 +1352,9 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
LLVM_DEBUG(It == WaitcntInstr->getParent()->end()
? dbgs()
- << "applyPreexistingWaitcnt\n"
+ << "applied pre-existing waitcnt\n"
<< "New Instr at block end: " << *WaitcntInstr << '\n'
- : dbgs() << "applyPreexistingWaitcnt\n"
+ : dbgs() << "applied pre-existing waitcnt\n"
<< "Old Instr: " << *It
<< "New Instr: " << *WaitcntInstr << '\n');
}
@@ -1336,10 +1368,10 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
Wait.StoreCnt = ~0u;
LLVM_DEBUG(It == WaitcntVsCntInstr->getParent()->end()
- ? dbgs() << "applyPreexistingWaitcnt\n"
+ ? dbgs() << "applied pre-existing waitcnt\n"
<< "New Instr at block end: " << *WaitcntVsCntInstr
<< '\n'
- : dbgs() << "applyPreexistingWaitcnt\n"
+ : dbgs() << "applied pre-existing waitcnt\n"
<< "Old Instr: " << *It
<< "New Instr: " << *WaitcntVsCntInstr << '\n');
}
@@ -1413,10 +1445,15 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
MachineInstr *CombinedStoreDsCntInstr = nullptr;
MachineInstr *WaitInstrs[NUM_EXTENDED_INST_CNTS] = {};
+ LLVM_DEBUG(dbgs() << "GFX12Plus::applyPreexistingWaitcnt at: " << *It);
+
for (auto &II :
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
- if (II.isMetaInstruction())
+ LLVM_DEBUG(dbgs() << "pre-existing iter: " << II);
+ if (II.isMetaInstruction()) {
+ LLVM_DEBUG(dbgs() << "skipped meta instruction\n");
continue;
+ }
MachineInstr **UpdatableInstr;
@@ -1486,10 +1523,10 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
Wait.DsCnt = ~0u;
LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
- ? dbgs() << "applyPreexistingWaitcnt\n"
+ ? dbgs() << "applied pre-existing waitcnt\n"
<< "New Instr at block end: "
<< *CombinedLoadDsCntInstr << '\n'
- : dbgs() << "applyPreexistingWaitcnt\n"
+ : dbgs() << "applied pre-existing waitcnt\n"
<< "Old Instr: " << *It << "New Instr: "
<< *CombinedLoadDsCntInstr << '\n');
} else {
@@ -1511,10 +1548,10 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
Wait.DsCnt = ~0u;
LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
- ? dbgs() << "applyPreexistingWaitcnt\n"
+ ? dbgs() << "applied pre-existing waitcnt\n"
<< "New Instr at block end: "
<< *CombinedStoreDsCntInstr << '\n'
- : dbgs() << "applyPreexistingWaitcnt\n"
+ : dbgs() << "applied pre-existing waitcnt\n"
<< "Old Instr: " << *It << "New Instr: "
<< *CombinedStoreDsCntInstr << '\n');
} else {
@@ -1570,10 +1607,10 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
setNoWait(Wait, CT);
LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
- ? dbgs() << "applyPreexistingWaitcnt\n"
+ ? dbgs() << "applied pre-existing waitcnt\n"
<< "New Instr at block end: " << *WaitInstrs[CT]
<< '\n'
- : dbgs() << "applyPreexistingWaitcnt\n"
+ : dbgs() << "applied pre-existing waitcnt\n"
<< "Old Instr: " << *It
<< "New Instr: " << *WaitInstrs[CT] << '\n');
} else {
@@ -2306,7 +2343,8 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
bool Modified = false;
LLVM_DEBUG({
- dbgs() << "*** Block" << Block.getNumber() << " ***";
+ dbgs() << "*** Begin Block: ";
+ Block.printName(dbgs());
ScoreBrackets.dump();
});
@@ -2437,6 +2475,12 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
Modified |= generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets,
OldWaitcntInstr);
+ LLVM_DEBUG({
+ dbgs() << "*** End Block: ";
+ Block.printName(dbgs());
+ ScoreBrackets.dump();
+ });
+
return Modified;
}
@@ -2699,8 +2743,10 @@ bool SIInsertWaitcnts::run(MachineFunction &MF) {
BlockInfo &SuccBI = SuccBII->second;
if (!SuccBI.Incoming) {
SuccBI.Dirty = true;
- if (SuccBII <= BII)
+ if (SuccBII <= BII) {
+ LLVM_DEBUG(dbgs() << "repeat on backedge\n");
Repeat = true;
+ }
if (!MoveBracketsToSucc) {
MoveBracketsToSucc = &SuccBI;
} else {
@@ -2708,8 +2754,10 @@ bool SIInsertWaitcnts::run(MachineFunction &MF) {
}
} else if (SuccBI.Incoming->merge(*Brackets)) {
SuccBI.Dirty = true;
- if (SuccBII <= BII)
+ if (SuccBII <= BII) {
+ LLVM_DEBUG(dbgs() << "repeat on backedge\n");
Repeat = true;
+ }
}
}
if (MoveBracketsToSucc)
More information about the llvm-commits
mailing list