[llvm] [AMDGPU][NFC][SIInsertWaitcnts] Remove redundant checks for invalidate instructions (PR #166139)
Stephen Thomas via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 3 02:42:53 PST 2025
https://github.com/stepthomas updated https://github.com/llvm/llvm-project/pull/166139
>From c7232f8947bb0dc9f1ac72b2ca33d65fdf2527c4 Mon Sep 17 00:00:00 2001
From: Stephen Thomas <Stephen.Thomas at amd.com>
Date: Fri, 31 Oct 2025 13:52:18 +0000
Subject: [PATCH 1/2] remove redundant checks for invalidate instructions
SIInsertWaitcnts::getVmemWaitEventType() tests for GLOBAL_INV, GLOBAL_WB, and
GLOBAL_WBINV instructions, but in each case it is used, then either a
check has already been made for these instructions, or it is known that
the instruction definitely will not be one of these.
Move the checks for these instructions out of getVmemWaitEventType() into
a new function getInvOrWBWaitEventType(), that returns an optional WaitEventType
value, and use that in the situation where it could be one of the instructions.
SIInstrInfo::isGFX12CacheInvOrWBInst() is now itself redundant and is removed.
---
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 17 +++++++++++------
llvm/lib/Target/AMDGPU/SIInstrInfo.h | 5 -----
2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index b7fa899678ec7..1fabb3fdabf66 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -527,9 +527,10 @@ class SIInsertWaitcnts {
#endif // NDEBUG
}
- // Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM
- // instruction.
- WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
+ // Return an optional WaitEventType value if Inst is a cache
+ // invalidate or WB instruction.
+ std::optional<WaitEventType>
+ getInvOrWBWaitEventType(const MachineInstr &Inst) const {
switch (Inst.getOpcode()) {
case AMDGPU::GLOBAL_INV:
return VMEM_READ_ACCESS; // tracked using loadcnt
@@ -537,9 +538,13 @@ class SIInsertWaitcnts {
case AMDGPU::GLOBAL_WBINV:
return VMEM_WRITE_ACCESS; // tracked using storecnt
default:
- break;
+ return {};
}
+ }
+ // Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM
+ // instruction.
+ WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
// Maps VMEM access types to their corresponding WaitEventType.
static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = {
VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
@@ -2265,8 +2270,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
ScoreBrackets->updateByEvent(LDS_ACCESS, Inst);
}
} else if (TII->isFLAT(Inst)) {
- if (SIInstrInfo::isGFX12CacheInvOrWBInst(Inst.getOpcode())) {
- ScoreBrackets->updateByEvent(getVmemWaitEventType(Inst), Inst);
+ if (std::optional<WaitEventType> ET = getInvOrWBWaitEventType(Inst)) {
+ ScoreBrackets->updateByEvent(*ET, Inst);
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index dc23a21f959ce..adb6002230fce 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1089,11 +1089,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
Opcode == AMDGPU::DS_GWS_BARRIER;
}
- static bool isGFX12CacheInvOrWBInst(unsigned Opc) {
- return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB ||
- Opc == AMDGPU::GLOBAL_WBINV;
- }
-
static bool isF16PseudoScalarTrans(unsigned Opcode) {
return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
Opcode == AMDGPU::V_S_LOG_F16_e64 ||
>From d3b154223132fd294516dca264589329305837c1 Mon Sep 17 00:00:00 2001
From: Stephen Thomas <Stephen.Thomas at amd.com>
Date: Mon, 3 Nov 2025 10:34:25 +0000
Subject: [PATCH 2/2] Move getInvOrWBWaitEventType() out of SIInsertWaitcnts
class
---
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 29 ++++++++++-----------
1 file changed, 14 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 1fabb3fdabf66..c4c35849d7d9b 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -224,6 +224,20 @@ VmemType getVmemType(const MachineInstr &Inst) {
return VMEM_NOSAMPLER;
}
+// Return an optional WaitEventType value if Inst is a cache invalidate
+// or WB instruction.
+std::optional<WaitEventType> getInvOrWBWaitEventType(const MachineInstr &Inst) {
+ switch (Inst.getOpcode()) {
+ case AMDGPU::GLOBAL_INV:
+ return VMEM_READ_ACCESS; // tracked using loadcnt
+ case AMDGPU::GLOBAL_WB:
+ case AMDGPU::GLOBAL_WBINV:
+ return VMEM_WRITE_ACCESS; // tracked using storecnt
+ default:
+ return {};
+ }
+}
+
unsigned &getCounterRef(AMDGPU::Waitcnt &Wait, InstCounterType T) {
switch (T) {
case LOAD_CNT:
@@ -527,21 +541,6 @@ class SIInsertWaitcnts {
#endif // NDEBUG
}
- // Return an optional WaitEventType value if Inst is a cache
- // invalidate or WB instruction.
- std::optional<WaitEventType>
- getInvOrWBWaitEventType(const MachineInstr &Inst) const {
- switch (Inst.getOpcode()) {
- case AMDGPU::GLOBAL_INV:
- return VMEM_READ_ACCESS; // tracked using loadcnt
- case AMDGPU::GLOBAL_WB:
- case AMDGPU::GLOBAL_WBINV:
- return VMEM_WRITE_ACCESS; // tracked using storecnt
- default:
- return {};
- }
- }
-
// Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM
// instruction.
WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
More information about the llvm-commits
mailing list