[llvm] [AMDGPU][SIInsertWaitcnt][NFC] Make getWait() and getCounterRef() member functions (PR #178345)

Tue Jan 27 18:58:51 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: vporpo (vporpo)

<details>
<summary>Changes</summary>

This is a cleanup patch that makes getCounterRef() a member function of the Waitcnt class because it's basically a getter for its members and also renames it to get(). 
Same for getWait(), it makes it a member of HardwareLimits and renames it to get().

---
Full diff: https://github.com/llvm/llvm-project/pull/178345.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (+21-107) 
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+64) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 46900223fe670..ce6f7a812c636 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -42,6 +42,7 @@
 #include "llvm/TargetParser/TargetParser.h"
 
 using namespace llvm;
+using namespace llvm::AMDGPU;
 
 #define DEBUG_TYPE "si-insert-waitcnts"
 
@@ -68,29 +69,6 @@ static cl::opt<bool> ExpertSchedulingModeFlag(
     cl::desc("Enable expert scheduling mode 2 for all functions (GFX12+ only)"),
     cl::init(false), cl::Hidden);
 
-namespace {
-// Class of object that encapsulates latest instruction counter score
-// associated with the operand.  Used for determining whether
-// s_waitcnt instruction needs to be emitted.
-
-enum InstCounterType {
-  LOAD_CNT = 0, // VMcnt prior to gfx12.
-  DS_CNT,       // LKGMcnt prior to gfx12.
-  EXP_CNT,      //
-  STORE_CNT,    // VScnt in gfx10/gfx11.
-  NUM_NORMAL_INST_CNTS,
-  SAMPLE_CNT = NUM_NORMAL_INST_CNTS, // gfx12+ only.
-  BVH_CNT,                           // gfx12+ only.
-  KM_CNT,                            // gfx12+ only.
-  X_CNT,                             // gfx1250.
-  NUM_EXTENDED_INST_CNTS,
-  VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
-  VM_VSRC,                          // gfx12+ expert mode only.
-  NUM_EXPERT_INST_CNTS,
-  NUM_INST_CNTS = NUM_EXPERT_INST_CNTS
-};
-} // namespace
-
 namespace llvm {
 template <> struct enum_iteration_traits<InstCounterType> {
   static constexpr bool is_iterable = true;
@@ -105,35 +83,6 @@ auto inst_counter_types(InstCounterType MaxCounter = NUM_INST_CNTS) {
   return enum_seq(LOAD_CNT, MaxCounter);
 }
 
-// Get the maximum wait count value for a given counter type.
-static unsigned getWaitCountMax(const AMDGPU::HardwareLimits &Limits,
-                                InstCounterType T) {
-  switch (T) {
-  case LOAD_CNT:
-    return Limits.LoadcntMax;
-  case DS_CNT:
-    return Limits.DscntMax;
-  case EXP_CNT:
-    return Limits.ExpcntMax;
-  case STORE_CNT:
-    return Limits.StorecntMax;
-  case SAMPLE_CNT:
-    return Limits.SamplecntMax;
-  case BVH_CNT:
-    return Limits.BvhcntMax;
-  case KM_CNT:
-    return Limits.KmcntMax;
-  case X_CNT:
-    return Limits.XcntMax;
-  case VA_VDST:
-    return Limits.VaVdstMax;
-  case VM_VSRC:
-    return Limits.VmVsrcMax;
-  default:
-    return 0;
-  }
-}
-
 static bool isSoftXcnt(MachineInstr &MI) {
   return MI.getOpcode() == AMDGPU::S_WAIT_XCNT_soft;
 }
@@ -281,45 +230,12 @@ VmemType getVmemType(const MachineInstr &Inst) {
   return VMEM_NOSAMPLER;
 }
 
-unsigned &getCounterRef(AMDGPU::Waitcnt &Wait, InstCounterType T) {
-  switch (T) {
-  case LOAD_CNT:
-    return Wait.LoadCnt;
-  case EXP_CNT:
-    return Wait.ExpCnt;
-  case DS_CNT:
-    return Wait.DsCnt;
-  case STORE_CNT:
-    return Wait.StoreCnt;
-  case SAMPLE_CNT:
-    return Wait.SampleCnt;
-  case BVH_CNT:
-    return Wait.BvhCnt;
-  case KM_CNT:
-    return Wait.KmCnt;
-  case X_CNT:
-    return Wait.XCnt;
-  case VA_VDST:
-    return Wait.VaVdst;
-  case VM_VSRC:
-    return Wait.VmVsrc;
-  default:
-    llvm_unreachable("bad InstCounterType");
-  }
-}
-
 void addWait(AMDGPU::Waitcnt &Wait, InstCounterType T, unsigned Count) {
-  unsigned &WC = getCounterRef(Wait, T);
+  unsigned &WC = Wait.get(T);
   WC = std::min(WC, Count);
 }
 
-void setNoWait(AMDGPU::Waitcnt &Wait, InstCounterType T) {
-  getCounterRef(Wait, T) = ~0u;
-}
-
-unsigned getWait(AMDGPU::Waitcnt &Wait, InstCounterType T) {
-  return getCounterRef(Wait, T);
-}
+void setNoWait(AMDGPU::Waitcnt &Wait, InstCounterType T) { Wait.get(T) = ~0u; }
 
 // Mapping from event to counter according to the table masks.
 InstCounterType eventCounter(const unsigned *masks, WaitEventType E) {
@@ -780,7 +696,7 @@ class WaitcntBrackets {
 
   unsigned getPendingGDSWait() const {
     return std::min(getScoreUB(DS_CNT) - LastGDS,
-                    getWaitCountMax(Context->getLimits(), DS_CNT) - 1);
+                    Context->getLimits().get(DS_CNT) - 1);
   }
 
   void setPendingGDS() { LastGDS = ScoreUBs[DS_CNT]; }
@@ -807,8 +723,8 @@ class WaitcntBrackets {
   }
 
   void setStateOnFunctionEntryOrReturn() {
-    setScoreUB(STORE_CNT, getScoreUB(STORE_CNT) +
-                              getWaitCountMax(Context->getLimits(), STORE_CNT));
+    setScoreUB(STORE_CNT,
+               getScoreUB(STORE_CNT) + Context->getLimits().get(STORE_CNT));
     PendingEvents |= Context->WaitEventMaskForInst[STORE_CNT];
   }
 
@@ -864,9 +780,8 @@ class WaitcntBrackets {
     if (T != EXP_CNT)
       return;
 
-    if (getScoreRange(EXP_CNT) > getWaitCountMax(Context->getLimits(), EXP_CNT))
-      ScoreLBs[EXP_CNT] =
-          ScoreUBs[EXP_CNT] - getWaitCountMax(Context->getLimits(), EXP_CNT);
+    if (getScoreRange(EXP_CNT) > Context->getLimits().get(EXP_CNT))
+      ScoreLBs[EXP_CNT] = ScoreUBs[EXP_CNT] - Context->getLimits().get(EXP_CNT);
   }
 
   void setRegScore(MCPhysReg Reg, InstCounterType T, unsigned Val) {
@@ -1369,8 +1284,8 @@ void WaitcntBrackets::determineWaitForScore(InstCounterType T,
     } else {
       // If a counter has been maxed out avoid overflow by waiting for
       // MAX(CounterType) - 1 instead.
-      unsigned NeededWait = std::min(
-          UB - ScoreToWait, getWaitCountMax(Context->getLimits(), T) - 1);
+      unsigned NeededWait =
+          std::min(UB - ScoreToWait, Context->getLimits().get(T) - 1);
       addWait(Wait, T, NeededWait);
     }
   }
@@ -1715,7 +1630,7 @@ bool WaitcntGeneratorPreGFX12::createNewWaitcnt(
       // would provide misleading profiling information.
       bool AnyOutOfOrder = false;
       for (auto CT : {LOAD_CNT, DS_CNT, EXP_CNT}) {
-        unsigned &WaitCnt = getCounterRef(Wait, CT);
+        unsigned &WaitCnt = Wait.get(CT);
         if (WaitCnt != ~0u && ScoreBrackets.counterOutOfOrder(CT)) {
           AnyOutOfOrder = true;
           break;
@@ -1730,16 +1645,16 @@ bool WaitcntGeneratorPreGFX12::createNewWaitcnt(
       } else {
         // All counters are in-order, safe to expand
         for (auto CT : {LOAD_CNT, DS_CNT, EXP_CNT}) {
-          unsigned &WaitCnt = getCounterRef(Wait, CT);
+          unsigned &WaitCnt = Wait.get(CT);
           if (WaitCnt == ~0u)
             continue;
 
           unsigned Outstanding = std::min(ScoreBrackets.getScoreUB(CT) -
                                               ScoreBrackets.getScoreLB(CT),
-                                          getWaitCountMax(getLimits(), CT) - 1);
+                                          getLimits().get(CT) - 1);
           EmitExpandedWaitcnt(Outstanding, WaitCnt, [&](unsigned Count) {
             AMDGPU::Waitcnt W;
-            getCounterRef(W, CT) = Count;
+            W.get(CT) = Count;
             BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT))
                 .addImm(AMDGPU::encodeWaitcnt(IV, W));
           });
@@ -1764,10 +1679,9 @@ bool WaitcntGeneratorPreGFX12::createNewWaitcnt(
     if (ExpandWaitcntProfiling && Wait.StoreCnt != ~0u &&
         !ScoreBrackets.counterOutOfOrder(STORE_CNT)) {
       // Only expand if counter is not out-of-order
-      unsigned Outstanding =
-          std::min(ScoreBrackets.getScoreUB(STORE_CNT) -
-                       ScoreBrackets.getScoreLB(STORE_CNT),
-                   getWaitCountMax(getLimits(), STORE_CNT) - 1);
+      unsigned Outstanding = std::min(ScoreBrackets.getScoreUB(STORE_CNT) -
+                                          ScoreBrackets.getScoreLB(STORE_CNT),
+                                      getLimits().get(STORE_CNT) - 1);
       EmitExpandedWaitcnt(Outstanding, Wait.StoreCnt, [&](unsigned Count) {
         BuildMI(Block, It, DL, TII->get(AMDGPU::S_WAITCNT_VSCNT))
             .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
@@ -2020,7 +1934,7 @@ bool WaitcntGeneratorGFX12Plus::applyPreexistingWaitcnt(
     if (!WaitInstrs[CT])
       continue;
 
-    unsigned NewCnt = getWait(Wait, CT);
+    unsigned NewCnt = Wait.get(CT);
     if (NewCnt != ~0u) {
       Modified |= updateOperandIfDifferent(*WaitInstrs[CT],
                                            AMDGPU::OpName::simm16, NewCnt);
@@ -2100,7 +2014,7 @@ bool WaitcntGeneratorGFX12Plus::createNewWaitcnt(
   // simpler
   if (ExpandWaitcntProfiling) {
     for (auto CT : inst_counter_types(NUM_EXTENDED_INST_CNTS)) {
-      unsigned Count = getWait(Wait, CT);
+      unsigned Count = Wait.get(CT);
       if (Count == ~0u)
         continue;
 
@@ -2114,7 +2028,7 @@ bool WaitcntGeneratorGFX12Plus::createNewWaitcnt(
 
       unsigned Outstanding =
           std::min(ScoreBrackets.getScoreUB(CT) - ScoreBrackets.getScoreLB(CT),
-                   getWaitCountMax(getLimits(), CT) - 1);
+                   getLimits().get(CT) - 1);
       EmitExpandedWaitcnt(Outstanding, Count, [&](unsigned Val) {
         BuildMI(Block, It, DL, TII->get(instrsForExtendedCounterTypes[CT]))
             .addImm(Val);
@@ -2160,7 +2074,7 @@ bool WaitcntGeneratorGFX12Plus::createNewWaitcnt(
   // waiting for.
 
   for (auto CT : inst_counter_types(NUM_EXTENDED_INST_CNTS)) {
-    unsigned Count = getWait(Wait, CT);
+    unsigned Count = Wait.get(CT);
     if (Count == ~0u)
       continue;
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 97dfdabc13691..ee14c3ccddecf 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1076,6 +1076,27 @@ getIntegerVecAttribute(const Function &F, StringRef Name, unsigned Size);
 /// Checks if \p Val is inside \p MD, a !range-like metadata.
 bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val);
 
+// Class of object that encapsulates latest instruction counter score
+// associated with the operand.  Used for determining whether
+// s_waitcnt instruction needs to be emitted.
+
+enum InstCounterType {
+  LOAD_CNT = 0, // VMcnt prior to gfx12.
+  DS_CNT,       // LKGMcnt prior to gfx12.
+  EXP_CNT,      //
+  STORE_CNT,    // VScnt in gfx10/gfx11.
+  NUM_NORMAL_INST_CNTS,
+  SAMPLE_CNT = NUM_NORMAL_INST_CNTS, // gfx12+ only.
+  BVH_CNT,                           // gfx12+ only.
+  KM_CNT,                            // gfx12+ only.
+  X_CNT,                             // gfx1250.
+  NUM_EXTENDED_INST_CNTS,
+  VA_VDST = NUM_EXTENDED_INST_CNTS, // gfx12+ expert mode only.
+  VM_VSRC,                          // gfx12+ expert mode only.
+  NUM_EXPERT_INST_CNTS,
+  NUM_INST_CNTS = NUM_EXPERT_INST_CNTS
+};
+
 /// Represents the counter values to wait for in an s_waitcnt instruction.
 ///
 /// Large values (including the maximum possible integer) can be used to
@@ -1092,6 +1113,28 @@ struct Waitcnt {
   unsigned VaVdst = ~0u;    // gfx12+ expert scheduling mode only.
   unsigned VmVsrc = ~0u;    // gfx12+ expert scheduling mode only.
 
+  unsigned &get(InstCounterType T) {
+    // clang-format off
+    switch (T) {
+    case LOAD_CNT:   return LoadCnt;
+    case EXP_CNT:    return ExpCnt;
+    case DS_CNT:     return DsCnt;
+    case STORE_CNT:  return StoreCnt;
+    case SAMPLE_CNT: return SampleCnt;
+    case BVH_CNT:    return BvhCnt;
+    case KM_CNT:     return KmCnt;
+    case X_CNT:      return XCnt;
+    case VA_VDST:    return VaVdst;
+    case VM_VSRC:    return VmVsrc;
+    default:
+      llvm_unreachable("bad InstCounterType");
+    }
+    // clang-format on
+  }
+  unsigned get(InstCounterType T) const {
+    return const_cast<Waitcnt *>(this)->get(T);
+  }
+
   Waitcnt() = default;
   // Pre-gfx12 constructor.
   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
@@ -1144,6 +1187,27 @@ struct HardwareLimits {
   unsigned VaVdstMax;    // gfx12+ expert mode only.
   unsigned VmVsrcMax;    // gfx12+ expert mode only.
 
+  // Get the maximum wait count value for a given counter type.
+  unsigned get(InstCounterType T) const {
+    // clang-format off
+    switch (T) {
+    case LOAD_CNT:   return LoadcntMax;
+    case DS_CNT:     return DscntMax;
+    case EXP_CNT:    return ExpcntMax;
+    case STORE_CNT:  return StorecntMax;
+    case SAMPLE_CNT: return SamplecntMax;
+    case BVH_CNT:    return BvhcntMax;
+    case KM_CNT:     return KmcntMax;
+    case X_CNT:      return XcntMax;
+    case VA_VDST:    return VaVdstMax;
+    case VM_VSRC:    return VmVsrcMax;
+    default:
+      // TODO: Shouldn't this be unreachable ?
+      return 0;
+    }
+    // clang-format on
+  }
+
   HardwareLimits() = default;
 
   /// Initializes hardware limits from ISA version.

``````````

</details>


https://github.com/llvm/llvm-project/pull/178345