[llvm] [AMDGPU][AMDGPUBaseInfo] Replace Waitcnt members with array (PR #182927)

via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 19 08:36:42 PDT 2026


https://github.com/vporpo updated https://github.com/llvm/llvm-project/pull/182927

>From 21fe4f435dbe57a6cf6d0a0007250a17a5bed2a1 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Mon, 2 Feb 2026 21:18:53 +0000
Subject: [PATCH 1/6] [AMDGPU][AMDGPUBaseInfo] Replace Waitcnt members with
 array

This patch replaces the member variables of Waitcnt with an array.
This helps in two ways:
(i) It helps replace switch cases with array accesses, and
(ii) It makes operating on all elements with a loop which is much easier.
---
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |  50 +++---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 153 +++++++-----------
 2 files changed, 88 insertions(+), 115 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index dc56d746e1a8e..b049e50a3f474 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -198,6 +198,32 @@ iota_range<InstCounterType> inst_counter_types(InstCounterType MaxCounter) {
   return enum_seq(LOAD_CNT, MaxCounter);
 }
 
+StringLiteral instCounterTypeToStr(InstCounterType T) {
+  // clang-format off
+  switch (T) {
+  case LOAD_CNT:   return "LoadCnt";
+  case DS_CNT:     return "DsCnt";
+  case EXP_CNT:    return "ExpCnt";
+  case STORE_CNT:  return "StoreCnt";
+  case SAMPLE_CNT: return "SampleCnt";
+  case BVH_CNT:    return "BvhCnt";
+  case KM_CNT:     return "KmCnt";
+  case X_CNT:      return "XCnt";
+  case VA_VDST:    return "VaVdst";
+  case VM_VSRC:    return "VmVsrc";
+  default:
+    return "Unknown T";
+  }
+  // clang-format on
+}
+
+#ifndef NDEBUG
+void Waitcnt::dump() const {
+  print(dbgs());
+  dbgs() << "\n";
+}
+#endif
+
 /// \returns true if the target supports signed immediate offset for SMRD
 /// instructions.
 bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
@@ -1755,30 +1781,6 @@ bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
   return false;
 }
 
-raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait) {
-  ListSeparator LS;
-  if (Wait.LoadCnt != ~0u)
-    OS << LS << "LoadCnt: " << Wait.LoadCnt;
-  if (Wait.ExpCnt != ~0u)
-    OS << LS << "ExpCnt: " << Wait.ExpCnt;
-  if (Wait.DsCnt != ~0u)
-    OS << LS << "DsCnt: " << Wait.DsCnt;
-  if (Wait.StoreCnt != ~0u)
-    OS << LS << "StoreCnt: " << Wait.StoreCnt;
-  if (Wait.SampleCnt != ~0u)
-    OS << LS << "SampleCnt: " << Wait.SampleCnt;
-  if (Wait.BvhCnt != ~0u)
-    OS << LS << "BvhCnt: " << Wait.BvhCnt;
-  if (Wait.KmCnt != ~0u)
-    OS << LS << "KmCnt: " << Wait.KmCnt;
-  if (Wait.XCnt != ~0u)
-    OS << LS << "XCnt: " << Wait.XCnt;
-  if (LS.unused())
-    OS << "none";
-  OS << '\n';
-  return OS;
-}
-
 unsigned getVmcntBitMask(const IsaVersion &Version) {
   return (1 << (getVmcntBitWidthLo(Version.Major) +
                 getVmcntBitWidthHi(Version.Major))) -
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 2032c3dbb3a86..0323c5ec057b9 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -11,6 +11,7 @@
 
 #include "AMDGPUSubtarget.h"
 #include "SIDefines.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Module.h"
@@ -1107,6 +1108,8 @@ enum InstCounterType {
   NUM_INST_CNTS = NUM_EXPERT_INST_CNTS
 };
 
+StringLiteral instCounterTypeToStr(InstCounterType T);
+
 // Return an iterator over all counters between LOAD_CNT (the first counter)
 // and \c MaxCounter (exclusive, default value yields an enumeration over
 // all counters).
@@ -1126,118 +1129,86 @@ namespace AMDGPU {
 /// Large values (including the maximum possible integer) can be used to
 /// represent "don't care" waits.
 class Waitcnt {
-  unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
-  unsigned ExpCnt = ~0u;
-  unsigned DsCnt = ~0u;     // Corresponds to LGKMcnt prior to gfx12.
-  unsigned StoreCnt = ~0u;  // Corresponds to VScnt on gfx10/gfx11.
-  unsigned SampleCnt = ~0u; // gfx12+ only.
-  unsigned BvhCnt = ~0u;    // gfx12+ only.
-  unsigned KmCnt = ~0u;     // gfx12+ only.
-  unsigned XCnt = ~0u;      // gfx1250.
-  unsigned VaVdst = ~0u;    // gfx12+ expert scheduling mode only.
-  unsigned VmVsrc = ~0u;    // gfx12+ expert scheduling mode only.
+  std::array<unsigned, NUM_INST_CNTS> Cnt;
 
 public:
-  unsigned get(InstCounterType T) const {
-    switch (T) {
-    case LOAD_CNT:
-      return LoadCnt;
-    case EXP_CNT:
-      return ExpCnt;
-    case DS_CNT:
-      return DsCnt;
-    case STORE_CNT:
-      return StoreCnt;
-    case SAMPLE_CNT:
-      return SampleCnt;
-    case BVH_CNT:
-      return BvhCnt;
-    case KM_CNT:
-      return KmCnt;
-    case X_CNT:
-      return XCnt;
-    case VA_VDST:
-      return VaVdst;
-    case VM_VSRC:
-      return VmVsrc;
-    default:
-      llvm_unreachable("bad InstCounterType");
-    }
-  }
-  void set(InstCounterType T, unsigned Val) {
-    switch (T) {
-    case LOAD_CNT:
-      LoadCnt = Val;
-      break;
-    case EXP_CNT:
-      ExpCnt = Val;
-      break;
-    case DS_CNT:
-      DsCnt = Val;
-      break;
-    case STORE_CNT:
-      StoreCnt = Val;
-      break;
-    case SAMPLE_CNT:
-      SampleCnt = Val;
-      break;
-    case BVH_CNT:
-      BvhCnt = Val;
-      break;
-    case KM_CNT:
-      KmCnt = Val;
-      break;
-    case X_CNT:
-      XCnt = Val;
-      break;
-    case VA_VDST:
-      VaVdst = Val;
-      break;
-    case VM_VSRC:
-      VmVsrc = Val;
-      break;
-    default:
-      llvm_unreachable("bad InstCounterType");
-    }
-  }
+  unsigned get(InstCounterType T) const { return Cnt[T]; }
+  void set(InstCounterType T, unsigned Val) { Cnt[T] = Val; }
 
-  Waitcnt() = default;
+  Waitcnt() { fill(Cnt, ~0u); }
   // Pre-gfx12 constructor.
   Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
-      : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
+      : Waitcnt() {
+    Cnt[LOAD_CNT] = VmCnt;
+    Cnt[EXP_CNT] = ExpCnt;
+    Cnt[DS_CNT] = LgkmCnt;
+    Cnt[STORE_CNT] = VsCnt;
+  }
 
   // gfx12+ constructor.
   Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
           unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
-          unsigned VaVdst, unsigned VmVsrc)
-      : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
-        SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt),
-        VaVdst(VaVdst), VmVsrc(VmVsrc) {}
+          unsigned VaVdst, unsigned VmVsrc) {
+    Cnt[LOAD_CNT] = LoadCnt;
+    Cnt[DS_CNT] = DsCnt;
+    Cnt[EXP_CNT] = ExpCnt;
+    Cnt[STORE_CNT] = StoreCnt;
+    Cnt[SAMPLE_CNT] = SampleCnt;
+    Cnt[BVH_CNT] = BvhCnt;
+    Cnt[KM_CNT] = KmCnt;
+    Cnt[X_CNT] = XCnt;
+    Cnt[VA_VDST] = VaVdst;
+    Cnt[VM_VSRC] = VmVsrc;
+  }
 
-  bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
+  bool hasWait() const {
+    return any_of(Cnt, [](unsigned Val) { return Val != ~0u; });
+  }
 
   bool hasWaitExceptStoreCnt() const {
-    return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
-           SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u ||
-           VaVdst != ~0u || VmVsrc != ~0u;
+    for (InstCounterType T : inst_counter_types()) {
+      if (T == STORE_CNT)
+        continue;
+      if (Cnt[T] != ~0u)
+        return true;
+    }
+    return false;
   }
 
-  bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
+  bool hasWaitStoreCnt() const { return Cnt[STORE_CNT] != ~0u; }
 
-  bool hasWaitDepctr() const { return VaVdst != ~0u || VmVsrc != ~0u; }
+  bool hasWaitDepctr() const {
+    return Cnt[VA_VDST] != ~0u || Cnt[VM_VSRC] != ~0u;
+  }
 
   Waitcnt combined(const Waitcnt &Other) const {
     // Does the right thing provided self and Other are either both pre-gfx12
     // or both gfx12+.
-    return Waitcnt(
-        std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
-        std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
-        std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
-        std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt),
-        std::min(VaVdst, Other.VaVdst), std::min(VmVsrc, Other.VmVsrc));
+    Waitcnt Wait;
+    for (InstCounterType T : inst_counter_types()) {
+      Wait.Cnt[T] = std::min(Cnt[T], Other.Cnt[T]);
+    }
+    return Wait;
+  }
+
+  void print(raw_ostream &OS) const {
+    ListSeparator LS;
+    for (InstCounterType T : inst_counter_types()) {
+      OS << LS << instCounterTypeToStr(T) << ": " << Cnt[T];
+    }
+    if (LS.unused())
+      OS << "none";
+    OS << '\n';
   }
 
-  friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait);
+#ifndef NDEBUG
+  LLVM_DUMP_METHOD void dump() const;
+#endif
+
+  friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait) {
+    Wait.print(OS);
+    return OS;
+  }
 };
 
 /// Represents the hardware counter limits for different wait count types.

>From 2643c25b7c982b888d494ca7e0c142c3ceb507b4 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Mon, 23 Feb 2026 21:50:47 +0000
Subject: [PATCH 2/6] fixup! [AMDGPU][AMDGPUBaseInfo] Replace Waitcnt members
 with array

---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index b049e50a3f474..141431285a1a5 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -218,10 +218,7 @@ StringLiteral instCounterTypeToStr(InstCounterType T) {
 }
 
 #ifndef NDEBUG
-void Waitcnt::dump() const {
-  print(dbgs());
-  dbgs() << "\n";
-}
+void Waitcnt::dump() const { dbgs() << *this << "\n"; }
 #endif
 
 /// \returns true if the target supports signed immediate offset for SMRD

>From fb0cc6e5d0e5aa60dae2b834b32c7b4d07398a7b Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Tue, 24 Feb 2026 03:15:20 +0000
Subject: [PATCH 3/6] fixup! fixup! [AMDGPU][AMDGPUBaseInfo] Replace Waitcnt
 members with array

---
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    | 32 ++++++++++++-------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 141431285a1a5..365fa44fbbb50 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -199,22 +199,30 @@ iota_range<InstCounterType> inst_counter_types(InstCounterType MaxCounter) {
 }
 
 StringLiteral instCounterTypeToStr(InstCounterType T) {
-  // clang-format off
   switch (T) {
-  case LOAD_CNT:   return "LoadCnt";
-  case DS_CNT:     return "DsCnt";
-  case EXP_CNT:    return "ExpCnt";
-  case STORE_CNT:  return "StoreCnt";
-  case SAMPLE_CNT: return "SampleCnt";
-  case BVH_CNT:    return "BvhCnt";
-  case KM_CNT:     return "KmCnt";
-  case X_CNT:      return "XCnt";
-  case VA_VDST:    return "VaVdst";
-  case VM_VSRC:    return "VmVsrc";
+  case LOAD_CNT:
+    return "LOAD_CNT";
+  case DS_CNT:
+    return "DS_CNT";
+  case EXP_CNT:
+    return "EXP_CNT";
+  case STORE_CNT:
+    return "STORE_CNT";
+  case SAMPLE_CNT:
+    return "SAMPLE_CNT";
+  case BVH_CNT:
+    return "BVH_CNT";
+  case KM_CNT:
+    return "KM_CNT";
+  case X_CNT:
+    return "X_CNT";
+  case VA_VDST:
+    return "VA_VDST";
+  case VM_VSRC:
+    return "VM_VSRC";
   default:
     return "Unknown T";
   }
-  // clang-format on
 }
 
 #ifndef NDEBUG

>From a4abb892589453195cf26bcf011bc57ea24b9ee3 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Fri, 27 Feb 2026 01:16:21 +0000
Subject: [PATCH 4/6] fixup! fixup! fixup! [AMDGPU][AMDGPUBaseInfo] Replace
 Waitcnt members with array

---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 0323c5ec057b9..4ba73a7556ffb 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1148,7 +1148,8 @@ class Waitcnt {
   // gfx12+ constructor.
   Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
           unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
-          unsigned VaVdst, unsigned VmVsrc) {
+          unsigned VaVdst, unsigned VmVsrc)
+      : Waitcnt() {
     Cnt[LOAD_CNT] = LoadCnt;
     Cnt[DS_CNT] = DsCnt;
     Cnt[EXP_CNT] = ExpCnt;

>From 22af22f795a4192d625a60881815513fd7b53909 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Tue, 10 Mar 2026 15:58:44 +0000
Subject: [PATCH 5/6] fixup! fixup! fixup! fixup! [AMDGPU][AMDGPUBaseInfo]
 Replace Waitcnt members with array

---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +-
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h   | 8 +++-----
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 365fa44fbbb50..651ff6ad864c7 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -225,7 +225,7 @@ StringLiteral instCounterTypeToStr(InstCounterType T) {
   }
 }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void Waitcnt::dump() const { dbgs() << *this << "\n"; }
 #endif
 
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 4ba73a7556ffb..1e1de47b3a81b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1186,23 +1186,21 @@ class Waitcnt {
     // Does the right thing provided self and Other are either both pre-gfx12
     // or both gfx12+.
     Waitcnt Wait;
-    for (InstCounterType T : inst_counter_types()) {
+    for (InstCounterType T : inst_counter_types())
       Wait.Cnt[T] = std::min(Cnt[T], Other.Cnt[T]);
-    }
     return Wait;
   }
 
   void print(raw_ostream &OS) const {
     ListSeparator LS;
-    for (InstCounterType T : inst_counter_types()) {
+    for (InstCounterType T : inst_counter_types())
       OS << LS << instCounterTypeToStr(T) << ": " << Cnt[T];
-    }
     if (LS.unused())
       OS << "none";
     OS << '\n';
   }
 
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   LLVM_DUMP_METHOD void dump() const;
 #endif
 

>From 6e49f99dd0e01e628c2c2592754a3a7b86485a3c Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Thu, 19 Mar 2026 15:25:20 +0000
Subject: [PATCH 6/6] fixup! fixup! fixup! fixup! fixup!
 [AMDGPU][AMDGPUBaseInfo] Replace Waitcnt members with array

---
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +-
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 651ff6ad864c7..10c9921f34318 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -198,7 +198,7 @@ iota_range<InstCounterType> inst_counter_types(InstCounterType MaxCounter) {
   return enum_seq(LOAD_CNT, MaxCounter);
 }
 
-StringLiteral instCounterTypeToStr(InstCounterType T) {
+StringLiteral getInstCounterName(InstCounterType T) {
   switch (T) {
   case LOAD_CNT:
     return "LOAD_CNT";
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 1e1de47b3a81b..619a8248b22fb 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1108,7 +1108,7 @@ enum InstCounterType {
   NUM_INST_CNTS = NUM_EXPERT_INST_CNTS
 };
 
-StringLiteral instCounterTypeToStr(InstCounterType T);
+StringLiteral getInstCounterName(InstCounterType T);
 
 // Return an iterator over all counters between LOAD_CNT (the first counter)
 // and \c MaxCounter (exclusive, default value yields an enumeration over
@@ -1194,7 +1194,7 @@ class Waitcnt {
   void print(raw_ostream &OS) const {
     ListSeparator LS;
     for (InstCounterType T : inst_counter_types())
-      OS << LS << instCounterTypeToStr(T) << ": " << Cnt[T];
+      OS << LS << getInstCounterName(T) << ": " << Cnt[T];
     if (LS.unused())
       OS << "none";
     OS << '\n';



More information about the llvm-commits mailing list