[llvm] [NFC][AMDGPU] print more info when debugging InsertWaitCnts pass (PR #144629)
Sameer Sahasrabuddhe via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 18 23:17:53 PDT 2025
https://github.com/ssahasra updated https://github.com/llvm/llvm-project/pull/144629
>From 64fc16f2accb433f6947ec88860e58facc780a98 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe <sameer.sahasrabuddhe at amd.com>
Date: Wed, 11 Jun 2025 11:27:26 +0530
Subject: [PATCH 1/4] [NFC][AMDGPU] print more info when debugging
InsertWaitCnts pass
---
.../lib/Target/AMDGPU/AMDGPUWaitEventType.def | 32 ++++++++
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 74 +++++++++++++------
2 files changed, 84 insertions(+), 22 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def b/llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def
new file mode 100644
index 0000000000000..271db53c2801d
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def
@@ -0,0 +1,32 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// An enumeration of all the event types handled by SIInsertWaitcnts.cpp
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+AMDGPU_WAIT_EVENT(VMEM_ACCESS) // vector-memory read & write
+AMDGPU_WAIT_EVENT(VMEM_READ_ACCESS) // vector-memory read
+AMDGPU_WAIT_EVENT(VMEM_SAMPLER_READ_ACCESS) // vector-memory SAMPLER read (gfx12+ only)
+AMDGPU_WAIT_EVENT(VMEM_BVH_READ_ACCESS) // vector-memory BVH read (gfx12+ only)
+AMDGPU_WAIT_EVENT(VMEM_WRITE_ACCESS) // vector-memory write that is not scratch
+AMDGPU_WAIT_EVENT(SCRATCH_WRITE_ACCESS) // vector-memory write that may be scratch
+AMDGPU_WAIT_EVENT(LDS_ACCESS) // lds read & write
+AMDGPU_WAIT_EVENT(GDS_ACCESS) // gds read & write
+AMDGPU_WAIT_EVENT(SQ_MESSAGE) // send message
+AMDGPU_WAIT_EVENT(SMEM_ACCESS) // scalar-memory read & write
+AMDGPU_WAIT_EVENT(EXP_GPR_LOCK) // export holding on its data src
+AMDGPU_WAIT_EVENT(GDS_GPR_LOCK) // GDS holding on its data and addr src
+AMDGPU_WAIT_EVENT(EXP_POS_ACCESS) // write to export position
+AMDGPU_WAIT_EVENT(EXP_PARAM_ACCESS) // write to export parameter
+AMDGPU_WAIT_EVENT(VMW_GPR_LOCK) // vector-memory write holding on its data src
+AMDGPU_WAIT_EVENT(EXP_LDS_ACCESS) // read by ldsdir counting as export
+
+#undef AMDGPU_WAIT_EVENT
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ca8e3244edd15..03a2dc0302780 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -104,24 +104,17 @@ struct HardwareLimits {
unsigned KmcntMax; // gfx12+ only.
};
+#define AMDGPU_WAIT_EVENT(Name) Name,
+
enum WaitEventType {
- VMEM_ACCESS, // vector-memory read & write
- VMEM_READ_ACCESS, // vector-memory read
- VMEM_SAMPLER_READ_ACCESS, // vector-memory SAMPLER read (gfx12+ only)
- VMEM_BVH_READ_ACCESS, // vector-memory BVH read (gfx12+ only)
- VMEM_WRITE_ACCESS, // vector-memory write that is not scratch
- SCRATCH_WRITE_ACCESS, // vector-memory write that may be scratch
- LDS_ACCESS, // lds read & write
- GDS_ACCESS, // gds read & write
- SQ_MESSAGE, // send message
- SMEM_ACCESS, // scalar-memory read & write
- EXP_GPR_LOCK, // export holding on its data src
- GDS_GPR_LOCK, // GDS holding on its data and addr src
- EXP_POS_ACCESS, // write to export position
- EXP_PARAM_ACCESS, // write to export parameter
- VMW_GPR_LOCK, // vector-memory write holding on its data src
- EXP_LDS_ACCESS, // read by ldsdir counting as export
- NUM_WAIT_EVENTS,
+#include "AMDGPUWaitEventType.def"
+ NUM_WAIT_EVENTS
+};
+
+#define AMDGPU_WAIT_EVENT(Name) #Name,
+
+static constexpr StringLiteral WaitEventTypeName[] = {
+#include "AMDGPUWaitEventType.def"
};
// The mapping is:
@@ -1100,6 +1093,20 @@ void WaitcntBrackets::print(raw_ostream &OS) const {
}
OS << '\n';
}
+
+ OS << "Pending Events: ";
+ if (hasPendingEvent()) {
+ ListSeparator LS;
+ for (unsigned I = 0; I != NUM_WAIT_EVENTS; ++I) {
+ if (hasPendingEvent((WaitEventType)I)) {
+ OS << LS << WaitEventTypeName[I];
+ }
+ }
+ } else {
+ OS << "none";
+ }
+ OS << '\n';
+
OS << '\n';
}
@@ -1265,10 +1272,15 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
MachineInstr *WaitcntInstr = nullptr;
MachineInstr *WaitcntVsCntInstr = nullptr;
+ LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It << "\n");
+
for (auto &II :
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
- if (II.isMetaInstruction())
+ LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n");
+ if (II.isMetaInstruction()) {
+ LLVM_DEBUG(dbgs() << "------ skipped\n");
continue;
+ }
unsigned Opcode = SIInstrInfo::getNonSoftWaitcntOpcode(II.getOpcode());
bool TrySimplify = Opcode != II.getOpcode() && !OptNone;
@@ -1413,10 +1425,16 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
MachineInstr *CombinedStoreDsCntInstr = nullptr;
MachineInstr *WaitInstrs[NUM_EXTENDED_INST_CNTS] = {};
+ LLVM_DEBUG(dbgs() << "GFX12Plus::applyPreexistingWaitcnt at: " << *It
+ << "\n");
+
for (auto &II :
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
- if (II.isMetaInstruction())
+ LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n");
+ if (II.isMetaInstruction()) {
+ LLVM_DEBUG(dbgs() << "------ skipped\n");
continue;
+ }
MachineInstr **UpdatableInstr;
@@ -2306,7 +2324,9 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
bool Modified = false;
LLVM_DEBUG({
- dbgs() << "*** Block" << Block.getNumber() << " ***";
+ dbgs() << "*** Block " << Block.getNumber() << ": ";
+ Block.printName(dbgs());
+ dbgs() << " ***";
ScoreBrackets.dump();
});
@@ -2437,6 +2457,12 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
Modified |= generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets,
OldWaitcntInstr);
+ LLVM_DEBUG({
+ dbgs() << "*** Block end state: " << Block.getNumber() << ": ";
+ Block.printName(dbgs());
+ ScoreBrackets.dump();
+ });
+
return Modified;
}
@@ -2699,8 +2725,10 @@ bool SIInsertWaitcnts::run(MachineFunction &MF) {
BlockInfo &SuccBI = SuccBII->second;
if (!SuccBI.Incoming) {
SuccBI.Dirty = true;
- if (SuccBII <= BII)
+ if (SuccBII <= BII) {
+ LLVM_DEBUG(dbgs() << "repeat on backedge\n");
Repeat = true;
+ }
if (!MoveBracketsToSucc) {
MoveBracketsToSucc = &SuccBI;
} else {
@@ -2708,8 +2736,10 @@ bool SIInsertWaitcnts::run(MachineFunction &MF) {
}
} else if (SuccBI.Incoming->merge(*Brackets)) {
SuccBI.Dirty = true;
- if (SuccBII <= BII)
+ if (SuccBII <= BII) {
+ LLVM_DEBUG(dbgs() << "repeat on backedge\n");
Repeat = true;
+ }
}
}
if (MoveBracketsToSucc)
>From c96c9f73509cd4f36e5607fb6e774495780afeb5 Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe <sameer.sahasrabuddhe at amd.com>
Date: Wed, 18 Jun 2025 11:58:00 +0530
Subject: [PATCH 2/4] minor cleanups
---
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 03a2dc0302780..cda843ead94cf 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -2324,9 +2324,8 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
bool Modified = false;
LLVM_DEBUG({
- dbgs() << "*** Block " << Block.getNumber() << ": ";
+ dbgs() << "*** Begin Block: ";
Block.printName(dbgs());
- dbgs() << " ***";
ScoreBrackets.dump();
});
@@ -2458,7 +2457,7 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
OldWaitcntInstr);
LLVM_DEBUG({
- dbgs() << "*** Block end state: " << Block.getNumber() << ": ";
+ dbgs() << "*** End Block: ";
Block.printName(dbgs());
ScoreBrackets.dump();
});
>From 826fcae60a91e6e504d7b0c92e30288a7ba8b07e Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe <sameer.sahasrabuddhe at amd.com>
Date: Thu, 19 Jun 2025 11:24:45 +0530
Subject: [PATCH 3/4] whitespace cleanup and a local macro
---
.../lib/Target/AMDGPU/AMDGPUWaitEventType.def | 32 ----------
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 63 ++++++++++++-------
2 files changed, 40 insertions(+), 55 deletions(-)
delete mode 100644 llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def b/llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def
deleted file mode 100644
index 271db53c2801d..0000000000000
--- a/llvm/lib/Target/AMDGPU/AMDGPUWaitEventType.def
+++ /dev/null
@@ -1,32 +0,0 @@
-//===----------------------------------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// An enumeration of all the event types handled by SIInsertWaitcnts.cpp
-//
-//===----------------------------------------------------------------------===//
-
-// NOTE: NO INCLUDE GUARD DESIRED!
-
-AMDGPU_WAIT_EVENT(VMEM_ACCESS) // vector-memory read & write
-AMDGPU_WAIT_EVENT(VMEM_READ_ACCESS) // vector-memory read
-AMDGPU_WAIT_EVENT(VMEM_SAMPLER_READ_ACCESS) // vector-memory SAMPLER read (gfx12+ only)
-AMDGPU_WAIT_EVENT(VMEM_BVH_READ_ACCESS) // vector-memory BVH read (gfx12+ only)
-AMDGPU_WAIT_EVENT(VMEM_WRITE_ACCESS) // vector-memory write that is not scratch
-AMDGPU_WAIT_EVENT(SCRATCH_WRITE_ACCESS) // vector-memory write that may be scratch
-AMDGPU_WAIT_EVENT(LDS_ACCESS) // lds read & write
-AMDGPU_WAIT_EVENT(GDS_ACCESS) // gds read & write
-AMDGPU_WAIT_EVENT(SQ_MESSAGE) // send message
-AMDGPU_WAIT_EVENT(SMEM_ACCESS) // scalar-memory read & write
-AMDGPU_WAIT_EVENT(EXP_GPR_LOCK) // export holding on its data src
-AMDGPU_WAIT_EVENT(GDS_GPR_LOCK) // GDS holding on its data and addr src
-AMDGPU_WAIT_EVENT(EXP_POS_ACCESS) // write to export position
-AMDGPU_WAIT_EVENT(EXP_PARAM_ACCESS) // write to export parameter
-AMDGPU_WAIT_EVENT(VMW_GPR_LOCK) // vector-memory write holding on its data src
-AMDGPU_WAIT_EVENT(EXP_LDS_ACCESS) // read by ldsdir counting as export
-
-#undef AMDGPU_WAIT_EVENT
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index cda843ead94cf..f41a3dd612039 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -104,18 +104,36 @@ struct HardwareLimits {
unsigned KmcntMax; // gfx12+ only.
};
-#define AMDGPU_WAIT_EVENT(Name) Name,
-
+#define AMDGPU_DECLARE_WAIT_EVENTS(DECL) \
+ DECL(VMEM_ACCESS) /* vmem read & write */ \
+ DECL(VMEM_READ_ACCESS) /* vmem read */ \
+ DECL(VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \
+ DECL(VMEM_BVH_READ_ACCESS) /* vmem BVH read (gfx12+ only) */ \
+ DECL(VMEM_WRITE_ACCESS) /* vmem write that is not scratch */ \
+ DECL(SCRATCH_WRITE_ACCESS) /* vmem write that may be scratch */ \
+ DECL(LDS_ACCESS) /* lds read & write */ \
+ DECL(GDS_ACCESS) /* gds read & write */ \
+ DECL(SQ_MESSAGE) /* send message */ \
+ DECL(SMEM_ACCESS) /* scalar-memory read & write */ \
+ DECL(EXP_GPR_LOCK) /* export holding on its data src */ \
+ DECL(GDS_GPR_LOCK) /* GDS holding on its data and addr src */ \
+ DECL(EXP_POS_ACCESS) /* write to export position */ \
+ DECL(EXP_PARAM_ACCESS) /* write to export parameter */ \
+ DECL(VMW_GPR_LOCK) /* vmem write holding on its data src */ \
+ DECL(EXP_LDS_ACCESS) /* read by ldsdir counting as export */
+
+#define AMDGPU_EVENT_ENUM(Name) Name,
enum WaitEventType {
-#include "AMDGPUWaitEventType.def"
+ AMDGPU_DECLARE_WAIT_EVENTS(AMDGPU_EVENT_ENUM)
NUM_WAIT_EVENTS
};
+#undef AMDGPU_EVENT_ENUM
-#define AMDGPU_WAIT_EVENT(Name) #Name,
-
+#define AMDGPU_EVENT_NAME(Name) #Name,
static constexpr StringLiteral WaitEventTypeName[] = {
-#include "AMDGPUWaitEventType.def"
+ AMDGPU_DECLARE_WAIT_EVENTS(AMDGPU_EVENT_NAME)
};
+#undef AMDGPU_EVENT_NAME
// The mapping is:
// 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs
@@ -1272,13 +1290,13 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
MachineInstr *WaitcntInstr = nullptr;
MachineInstr *WaitcntVsCntInstr = nullptr;
- LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It << "\n");
+ LLVM_DEBUG(dbgs() << "PreGFX12::applyPreexistingWaitcnt at: " << *It);
for (auto &II :
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
- LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n");
+ LLVM_DEBUG(dbgs() << "pre-existing iter: " << II);
if (II.isMetaInstruction()) {
- LLVM_DEBUG(dbgs() << "------ skipped\n");
+ LLVM_DEBUG(dbgs() << "skipped meta instruction\n");
continue;
}
@@ -1332,9 +1350,9 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
LLVM_DEBUG(It == WaitcntInstr->getParent()->end()
? dbgs()
- << "applyPreexistingWaitcnt\n"
+ << "applied pre-existing waitcnt\n"
<< "New Instr at block end: " << *WaitcntInstr << '\n'
- : dbgs() << "applyPreexistingWaitcnt\n"
+ : dbgs() << "applied pre-existing waitcnt\n"
<< "Old Instr: " << *It
<< "New Instr: " << *WaitcntInstr << '\n');
}
@@ -1348,10 +1366,10 @@ bool WaitcntGeneratorPreGFX12::applyPreexistingWaitcnt(
Wait.StoreCnt = ~0u;
LLVM_DEBUG(It == WaitcntVsCntInstr->getParent()->end()
- ? dbgs() << "applyPreexistingWaitcnt\n"
+ ? dbgs() << "applied pre-existing waitcnt\n"
<< "New Instr at block end: " << *WaitcntVsCntInstr
<< '\n'
- : dbgs() << "applyPreexistingWaitcnt\n"
+ : dbgs() << "applied pre-existing waitcnt\n"
<< "Old Instr: " << *It
<< "New Instr: " << *WaitcntVsCntInstr << '\n');
}
@@ -1425,14 +1443,13 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
MachineInstr *CombinedStoreDsCntInstr = nullptr;
MachineInstr *WaitInstrs[NUM_EXTENDED_INST_CNTS] = {};
- LLVM_DEBUG(dbgs() << "GFX12Plus::applyPreexistingWaitcnt at: " << *It
- << "\n");
+ LLVM_DEBUG(dbgs() << "GFX12Plus::applyPreexistingWaitcnt at: " << *It);
for (auto &II :
make_early_inc_range(make_range(OldWaitcntInstr.getIterator(), It))) {
- LLVM_DEBUG(dbgs() << "pre-existing iter: " << II << "\n");
+ LLVM_DEBUG(dbgs() << "pre-existing iter: " << II);
if (II.isMetaInstruction()) {
- LLVM_DEBUG(dbgs() << "------ skipped\n");
+ LLVM_DEBUG(dbgs() << "skipped meta instruction\n");
continue;
}
@@ -1504,10 +1521,10 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
Wait.DsCnt = ~0u;
LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
- ? dbgs() << "applyPreexistingWaitcnt\n"
+ ? dbgs() << "applied pre-existing waitcnt\n"
<< "New Instr at block end: "
<< *CombinedLoadDsCntInstr << '\n'
- : dbgs() << "applyPreexistingWaitcnt\n"
+ : dbgs() << "applied pre-existing waitcnt\n"
<< "Old Instr: " << *It << "New Instr: "
<< *CombinedLoadDsCntInstr << '\n');
} else {
@@ -1529,10 +1546,10 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
Wait.DsCnt = ~0u;
LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
- ? dbgs() << "applyPreexistingWaitcnt\n"
+ ? dbgs() << "applied pre-existing waitcnt\n"
<< "New Instr at block end: "
<< *CombinedStoreDsCntInstr << '\n'
- : dbgs() << "applyPreexistingWaitcnt\n"
+ : dbgs() << "applied pre-existing waitcnt\n"
<< "Old Instr: " << *It << "New Instr: "
<< *CombinedStoreDsCntInstr << '\n');
} else {
@@ -1588,10 +1605,10 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
setNoWait(Wait, CT);
LLVM_DEBUG(It == OldWaitcntInstr.getParent()->end()
- ? dbgs() << "applyPreexistingWaitcnt\n"
+ ? dbgs() << "applied pre-existing waitcnt\n"
<< "New Instr at block end: " << *WaitInstrs[CT]
<< '\n'
- : dbgs() << "applyPreexistingWaitcnt\n"
+ : dbgs() << "applied pre-existing waitcnt\n"
<< "Old Instr: " << *It
<< "New Instr: " << *WaitInstrs[CT] << '\n');
} else {
>From 7435a892ff9e306207c22228f96d350d7a08621e Mon Sep 17 00:00:00 2001
From: Sameer Sahasrabuddhe <sameer.sahasrabuddhe at amd.com>
Date: Thu, 19 Jun 2025 11:47:02 +0530
Subject: [PATCH 4/4] clang-format
---
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 34 +++++++++++----------
1 file changed, 18 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index f41a3dd612039..f7b88bf2d5ebc 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -104,24 +104,25 @@ struct HardwareLimits {
unsigned KmcntMax; // gfx12+ only.
};
-#define AMDGPU_DECLARE_WAIT_EVENTS(DECL) \
- DECL(VMEM_ACCESS) /* vmem read & write */ \
- DECL(VMEM_READ_ACCESS) /* vmem read */ \
- DECL(VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \
- DECL(VMEM_BVH_READ_ACCESS) /* vmem BVH read (gfx12+ only) */ \
- DECL(VMEM_WRITE_ACCESS) /* vmem write that is not scratch */ \
- DECL(SCRATCH_WRITE_ACCESS) /* vmem write that may be scratch */ \
- DECL(LDS_ACCESS) /* lds read & write */ \
- DECL(GDS_ACCESS) /* gds read & write */ \
- DECL(SQ_MESSAGE) /* send message */ \
- DECL(SMEM_ACCESS) /* scalar-memory read & write */ \
- DECL(EXP_GPR_LOCK) /* export holding on its data src */ \
- DECL(GDS_GPR_LOCK) /* GDS holding on its data and addr src */ \
- DECL(EXP_POS_ACCESS) /* write to export position */ \
- DECL(EXP_PARAM_ACCESS) /* write to export parameter */ \
- DECL(VMW_GPR_LOCK) /* vmem write holding on its data src */ \
+#define AMDGPU_DECLARE_WAIT_EVENTS(DECL) \
+ DECL(VMEM_ACCESS) /* vmem read & write */ \
+ DECL(VMEM_READ_ACCESS) /* vmem read */ \
+ DECL(VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \
+ DECL(VMEM_BVH_READ_ACCESS) /* vmem BVH read (gfx12+ only) */ \
+ DECL(VMEM_WRITE_ACCESS) /* vmem write that is not scratch */ \
+ DECL(SCRATCH_WRITE_ACCESS) /* vmem write that may be scratch */ \
+ DECL(LDS_ACCESS) /* lds read & write */ \
+ DECL(GDS_ACCESS) /* gds read & write */ \
+ DECL(SQ_MESSAGE) /* send message */ \
+ DECL(SMEM_ACCESS) /* scalar-memory read & write */ \
+ DECL(EXP_GPR_LOCK) /* export holding on its data src */ \
+ DECL(GDS_GPR_LOCK) /* GDS holding on its data and addr src */ \
+ DECL(EXP_POS_ACCESS) /* write to export position */ \
+ DECL(EXP_PARAM_ACCESS) /* write to export parameter */ \
+ DECL(VMW_GPR_LOCK) /* vmem write holding on its data src */ \
DECL(EXP_LDS_ACCESS) /* read by ldsdir counting as export */
+// clang-format off
#define AMDGPU_EVENT_ENUM(Name) Name,
enum WaitEventType {
AMDGPU_DECLARE_WAIT_EVENTS(AMDGPU_EVENT_ENUM)
@@ -134,6 +135,7 @@ static constexpr StringLiteral WaitEventTypeName[] = {
AMDGPU_DECLARE_WAIT_EVENTS(AMDGPU_EVENT_NAME)
};
#undef AMDGPU_EVENT_NAME
+// clang-format on
// The mapping is:
// 0 .. SQ_MAX_PGM_VGPRS-1 real VGPRs
More information about the llvm-commits
mailing list