[llvm] remove redundant checks for invalidate instructions (PR #166139)

Mon Nov 3 01:25:40 PST 2025

https://github.com/stepthomas created https://github.com/llvm/llvm-project/pull/166139

SIInsertWaitcnts::getVmemWaitEventType() tests for GLOBAL_INV, GLOBAL_WB, and GLOBAL_WBINV instructions, but in each case it is used, then either a check has already been made for these instructions, or it is known that the instruction definitely will not be one of these.

Move the checks for these instructions out of getVmemWaitEventType() into a new function getInvOrWBWaitEventType(), that returns an optional WaitEventType value, and use that in the situation where it could be one of the instructions.

SIInstrInfo::isGFX12CacheInvOrWBInst() is now itself redundant and is removed.

>From c7232f8947bb0dc9f1ac72b2ca33d65fdf2527c4 Mon Sep 17 00:00:00 2001
From: Stephen Thomas <Stephen.Thomas at amd.com>
Date: Fri, 31 Oct 2025 13:52:18 +0000
Subject: [PATCH] remove redundant checks for invalidate instructions

SIInsertWaitcnts::getVmemWaitEventType() tests for GLOBAL_INV, GLOBAL_WB, and
GLOBAL_WBINV instructions, but in each case it is used, then either a
check has already been made for these instructions, or it is known that
the instruction definitely will not be one of these.

Move the checks for these instructions out of getVmemWaitEventType() into
a new function getInvOrWBWaitEventType(), that returns an optional WaitEventType
value, and use that in the situation where it could be one of the instructions.

SIInstrInfo::isGFX12CacheInvOrWBInst() is now itself redundant and is removed.
---
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 17 +++++++++++------
 llvm/lib/Target/AMDGPU/SIInstrInfo.h        |  5 -----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index b7fa899678ec7..1fabb3fdabf66 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -527,9 +527,10 @@ class SIInsertWaitcnts {
 #endif // NDEBUG
   }
 
-  // Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM
-  // instruction.
-  WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
+  // Return an optional WaitEventType value if Inst is a cache
+  // invalidate or WB instruction.
+  std::optional<WaitEventType>
+  getInvOrWBWaitEventType(const MachineInstr &Inst) const {
     switch (Inst.getOpcode()) {
     case AMDGPU::GLOBAL_INV:
       return VMEM_READ_ACCESS; // tracked using loadcnt
@@ -537,9 +538,13 @@ class SIInsertWaitcnts {
     case AMDGPU::GLOBAL_WBINV:
       return VMEM_WRITE_ACCESS; // tracked using storecnt
     default:
-      break;
+      return {};
     }
+  }
 
+  // Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM
+  // instruction.
+  WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
     // Maps VMEM access types to their corresponding WaitEventType.
     static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = {
         VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
@@ -2265,8 +2270,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
       ScoreBrackets->updateByEvent(LDS_ACCESS, Inst);
     }
   } else if (TII->isFLAT(Inst)) {
-    if (SIInstrInfo::isGFX12CacheInvOrWBInst(Inst.getOpcode())) {
-      ScoreBrackets->updateByEvent(getVmemWaitEventType(Inst), Inst);
+    if (std::optional<WaitEventType> ET = getInvOrWBWaitEventType(Inst)) {
+      ScoreBrackets->updateByEvent(*ET, Inst);
       return;
     }
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index dc23a21f959ce..adb6002230fce 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -1089,11 +1089,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
            Opcode == AMDGPU::DS_GWS_BARRIER;
   }
 
-  static bool isGFX12CacheInvOrWBInst(unsigned Opc) {
-    return Opc == AMDGPU::GLOBAL_INV || Opc == AMDGPU::GLOBAL_WB ||
-           Opc == AMDGPU::GLOBAL_WBINV;
-  }
-
   static bool isF16PseudoScalarTrans(unsigned Opcode) {
     return Opcode == AMDGPU::V_S_EXP_F16_e64 ||
            Opcode == AMDGPU::V_S_LOG_F16_e64 ||