[llvm] [AMDGPU][AMDGPUBaseInfo] Replace Waitcnt members with array (PR #182927)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 19 08:36:42 PDT 2026
https://github.com/vporpo updated https://github.com/llvm/llvm-project/pull/182927
>From 21fe4f435dbe57a6cf6d0a0007250a17a5bed2a1 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Mon, 2 Feb 2026 21:18:53 +0000
Subject: [PATCH 1/6] [AMDGPU][AMDGPUBaseInfo] Replace Waitcnt members with
array
This patch replaces the member variables of Waitcnt with an array.
This helps in two ways:
(i) It helps replace switch cases with array accesses, and
(ii) It makes operating on all elements with a loop which is much easier.
---
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 50 +++---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 153 +++++++-----------
2 files changed, 88 insertions(+), 115 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index dc56d746e1a8e..b049e50a3f474 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -198,6 +198,32 @@ iota_range<InstCounterType> inst_counter_types(InstCounterType MaxCounter) {
return enum_seq(LOAD_CNT, MaxCounter);
}
+StringLiteral instCounterTypeToStr(InstCounterType T) {
+ // clang-format off
+ switch (T) {
+ case LOAD_CNT: return "LoadCnt";
+ case DS_CNT: return "DsCnt";
+ case EXP_CNT: return "ExpCnt";
+ case STORE_CNT: return "StoreCnt";
+ case SAMPLE_CNT: return "SampleCnt";
+ case BVH_CNT: return "BvhCnt";
+ case KM_CNT: return "KmCnt";
+ case X_CNT: return "XCnt";
+ case VA_VDST: return "VaVdst";
+ case VM_VSRC: return "VmVsrc";
+ default:
+ return "Unknown T";
+ }
+ // clang-format on
+}
+
+#ifndef NDEBUG
+void Waitcnt::dump() const {
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
/// \returns true if the target supports signed immediate offset for SMRD
/// instructions.
bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
@@ -1755,30 +1781,6 @@ bool hasValueInRangeLikeMetadata(const MDNode &MD, int64_t Val) {
return false;
}
-raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait) {
- ListSeparator LS;
- if (Wait.LoadCnt != ~0u)
- OS << LS << "LoadCnt: " << Wait.LoadCnt;
- if (Wait.ExpCnt != ~0u)
- OS << LS << "ExpCnt: " << Wait.ExpCnt;
- if (Wait.DsCnt != ~0u)
- OS << LS << "DsCnt: " << Wait.DsCnt;
- if (Wait.StoreCnt != ~0u)
- OS << LS << "StoreCnt: " << Wait.StoreCnt;
- if (Wait.SampleCnt != ~0u)
- OS << LS << "SampleCnt: " << Wait.SampleCnt;
- if (Wait.BvhCnt != ~0u)
- OS << LS << "BvhCnt: " << Wait.BvhCnt;
- if (Wait.KmCnt != ~0u)
- OS << LS << "KmCnt: " << Wait.KmCnt;
- if (Wait.XCnt != ~0u)
- OS << LS << "XCnt: " << Wait.XCnt;
- if (LS.unused())
- OS << "none";
- OS << '\n';
- return OS;
-}
-
unsigned getVmcntBitMask(const IsaVersion &Version) {
return (1 << (getVmcntBitWidthLo(Version.Major) +
getVmcntBitWidthHi(Version.Major))) -
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 2032c3dbb3a86..0323c5ec057b9 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -11,6 +11,7 @@
#include "AMDGPUSubtarget.h"
#include "SIDefines.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Module.h"
@@ -1107,6 +1108,8 @@ enum InstCounterType {
NUM_INST_CNTS = NUM_EXPERT_INST_CNTS
};
+StringLiteral instCounterTypeToStr(InstCounterType T);
+
// Return an iterator over all counters between LOAD_CNT (the first counter)
// and \c MaxCounter (exclusive, default value yields an enumeration over
// all counters).
@@ -1126,118 +1129,86 @@ namespace AMDGPU {
/// Large values (including the maximum possible integer) can be used to
/// represent "don't care" waits.
class Waitcnt {
- unsigned LoadCnt = ~0u; // Corresponds to Vmcnt prior to gfx12.
- unsigned ExpCnt = ~0u;
- unsigned DsCnt = ~0u; // Corresponds to LGKMcnt prior to gfx12.
- unsigned StoreCnt = ~0u; // Corresponds to VScnt on gfx10/gfx11.
- unsigned SampleCnt = ~0u; // gfx12+ only.
- unsigned BvhCnt = ~0u; // gfx12+ only.
- unsigned KmCnt = ~0u; // gfx12+ only.
- unsigned XCnt = ~0u; // gfx1250.
- unsigned VaVdst = ~0u; // gfx12+ expert scheduling mode only.
- unsigned VmVsrc = ~0u; // gfx12+ expert scheduling mode only.
+ std::array<unsigned, NUM_INST_CNTS> Cnt;
public:
- unsigned get(InstCounterType T) const {
- switch (T) {
- case LOAD_CNT:
- return LoadCnt;
- case EXP_CNT:
- return ExpCnt;
- case DS_CNT:
- return DsCnt;
- case STORE_CNT:
- return StoreCnt;
- case SAMPLE_CNT:
- return SampleCnt;
- case BVH_CNT:
- return BvhCnt;
- case KM_CNT:
- return KmCnt;
- case X_CNT:
- return XCnt;
- case VA_VDST:
- return VaVdst;
- case VM_VSRC:
- return VmVsrc;
- default:
- llvm_unreachable("bad InstCounterType");
- }
- }
- void set(InstCounterType T, unsigned Val) {
- switch (T) {
- case LOAD_CNT:
- LoadCnt = Val;
- break;
- case EXP_CNT:
- ExpCnt = Val;
- break;
- case DS_CNT:
- DsCnt = Val;
- break;
- case STORE_CNT:
- StoreCnt = Val;
- break;
- case SAMPLE_CNT:
- SampleCnt = Val;
- break;
- case BVH_CNT:
- BvhCnt = Val;
- break;
- case KM_CNT:
- KmCnt = Val;
- break;
- case X_CNT:
- XCnt = Val;
- break;
- case VA_VDST:
- VaVdst = Val;
- break;
- case VM_VSRC:
- VmVsrc = Val;
- break;
- default:
- llvm_unreachable("bad InstCounterType");
- }
- }
+ unsigned get(InstCounterType T) const { return Cnt[T]; }
+ void set(InstCounterType T, unsigned Val) { Cnt[T] = Val; }
- Waitcnt() = default;
+ Waitcnt() { fill(Cnt, ~0u); }
// Pre-gfx12 constructor.
Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
- : LoadCnt(VmCnt), ExpCnt(ExpCnt), DsCnt(LgkmCnt), StoreCnt(VsCnt) {}
+ : Waitcnt() {
+ Cnt[LOAD_CNT] = VmCnt;
+ Cnt[EXP_CNT] = ExpCnt;
+ Cnt[DS_CNT] = LgkmCnt;
+ Cnt[STORE_CNT] = VsCnt;
+ }
// gfx12+ constructor.
Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
- unsigned VaVdst, unsigned VmVsrc)
- : LoadCnt(LoadCnt), ExpCnt(ExpCnt), DsCnt(DsCnt), StoreCnt(StoreCnt),
- SampleCnt(SampleCnt), BvhCnt(BvhCnt), KmCnt(KmCnt), XCnt(XCnt),
- VaVdst(VaVdst), VmVsrc(VmVsrc) {}
+ unsigned VaVdst, unsigned VmVsrc) {
+ Cnt[LOAD_CNT] = LoadCnt;
+ Cnt[DS_CNT] = DsCnt;
+ Cnt[EXP_CNT] = ExpCnt;
+ Cnt[STORE_CNT] = StoreCnt;
+ Cnt[SAMPLE_CNT] = SampleCnt;
+ Cnt[BVH_CNT] = BvhCnt;
+ Cnt[KM_CNT] = KmCnt;
+ Cnt[X_CNT] = XCnt;
+ Cnt[VA_VDST] = VaVdst;
+ Cnt[VM_VSRC] = VmVsrc;
+ }
- bool hasWait() const { return StoreCnt != ~0u || hasWaitExceptStoreCnt(); }
+ bool hasWait() const {
+ return any_of(Cnt, [](unsigned Val) { return Val != ~0u; });
+ }
bool hasWaitExceptStoreCnt() const {
- return LoadCnt != ~0u || ExpCnt != ~0u || DsCnt != ~0u ||
- SampleCnt != ~0u || BvhCnt != ~0u || KmCnt != ~0u || XCnt != ~0u ||
- VaVdst != ~0u || VmVsrc != ~0u;
+ for (InstCounterType T : inst_counter_types()) {
+ if (T == STORE_CNT)
+ continue;
+ if (Cnt[T] != ~0u)
+ return true;
+ }
+ return false;
}
- bool hasWaitStoreCnt() const { return StoreCnt != ~0u; }
+ bool hasWaitStoreCnt() const { return Cnt[STORE_CNT] != ~0u; }
- bool hasWaitDepctr() const { return VaVdst != ~0u || VmVsrc != ~0u; }
+ bool hasWaitDepctr() const {
+ return Cnt[VA_VDST] != ~0u || Cnt[VM_VSRC] != ~0u;
+ }
Waitcnt combined(const Waitcnt &Other) const {
// Does the right thing provided self and Other are either both pre-gfx12
// or both gfx12+.
- return Waitcnt(
- std::min(LoadCnt, Other.LoadCnt), std::min(ExpCnt, Other.ExpCnt),
- std::min(DsCnt, Other.DsCnt), std::min(StoreCnt, Other.StoreCnt),
- std::min(SampleCnt, Other.SampleCnt), std::min(BvhCnt, Other.BvhCnt),
- std::min(KmCnt, Other.KmCnt), std::min(XCnt, Other.XCnt),
- std::min(VaVdst, Other.VaVdst), std::min(VmVsrc, Other.VmVsrc));
+ Waitcnt Wait;
+ for (InstCounterType T : inst_counter_types()) {
+ Wait.Cnt[T] = std::min(Cnt[T], Other.Cnt[T]);
+ }
+ return Wait;
+ }
+
+ void print(raw_ostream &OS) const {
+ ListSeparator LS;
+ for (InstCounterType T : inst_counter_types()) {
+ OS << LS << instCounterTypeToStr(T) << ": " << Cnt[T];
+ }
+ if (LS.unused())
+ OS << "none";
+ OS << '\n';
}
- friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait);
+#ifndef NDEBUG
+ LLVM_DUMP_METHOD void dump() const;
+#endif
+
+ friend raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::Waitcnt &Wait) {
+ Wait.print(OS);
+ return OS;
+ }
};
/// Represents the hardware counter limits for different wait count types.
>From 2643c25b7c982b888d494ca7e0c142c3ceb507b4 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Mon, 23 Feb 2026 21:50:47 +0000
Subject: [PATCH 2/6] fixup! [AMDGPU][AMDGPUBaseInfo] Replace Waitcnt members
with array
---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index b049e50a3f474..141431285a1a5 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -218,10 +218,7 @@ StringLiteral instCounterTypeToStr(InstCounterType T) {
}
#ifndef NDEBUG
-void Waitcnt::dump() const {
- print(dbgs());
- dbgs() << "\n";
-}
+void Waitcnt::dump() const { dbgs() << *this << "\n"; }
#endif
/// \returns true if the target supports signed immediate offset for SMRD
>From fb0cc6e5d0e5aa60dae2b834b32c7b4d07398a7b Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Tue, 24 Feb 2026 03:15:20 +0000
Subject: [PATCH 3/6] fixup! fixup! [AMDGPU][AMDGPUBaseInfo] Replace Waitcnt
members with array
---
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 32 ++++++++++++-------
1 file changed, 20 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 141431285a1a5..365fa44fbbb50 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -199,22 +199,30 @@ iota_range<InstCounterType> inst_counter_types(InstCounterType MaxCounter) {
}
StringLiteral instCounterTypeToStr(InstCounterType T) {
- // clang-format off
switch (T) {
- case LOAD_CNT: return "LoadCnt";
- case DS_CNT: return "DsCnt";
- case EXP_CNT: return "ExpCnt";
- case STORE_CNT: return "StoreCnt";
- case SAMPLE_CNT: return "SampleCnt";
- case BVH_CNT: return "BvhCnt";
- case KM_CNT: return "KmCnt";
- case X_CNT: return "XCnt";
- case VA_VDST: return "VaVdst";
- case VM_VSRC: return "VmVsrc";
+ case LOAD_CNT:
+ return "LOAD_CNT";
+ case DS_CNT:
+ return "DS_CNT";
+ case EXP_CNT:
+ return "EXP_CNT";
+ case STORE_CNT:
+ return "STORE_CNT";
+ case SAMPLE_CNT:
+ return "SAMPLE_CNT";
+ case BVH_CNT:
+ return "BVH_CNT";
+ case KM_CNT:
+ return "KM_CNT";
+ case X_CNT:
+ return "X_CNT";
+ case VA_VDST:
+ return "VA_VDST";
+ case VM_VSRC:
+ return "VM_VSRC";
default:
return "Unknown T";
}
- // clang-format on
}
#ifndef NDEBUG
>From a4abb892589453195cf26bcf011bc57ea24b9ee3 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Fri, 27 Feb 2026 01:16:21 +0000
Subject: [PATCH 4/6] fixup! fixup! fixup! [AMDGPU][AMDGPUBaseInfo] Replace
Waitcnt members with array
---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 0323c5ec057b9..4ba73a7556ffb 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1148,7 +1148,8 @@ class Waitcnt {
// gfx12+ constructor.
Waitcnt(unsigned LoadCnt, unsigned ExpCnt, unsigned DsCnt, unsigned StoreCnt,
unsigned SampleCnt, unsigned BvhCnt, unsigned KmCnt, unsigned XCnt,
- unsigned VaVdst, unsigned VmVsrc) {
+ unsigned VaVdst, unsigned VmVsrc)
+ : Waitcnt() {
Cnt[LOAD_CNT] = LoadCnt;
Cnt[DS_CNT] = DsCnt;
Cnt[EXP_CNT] = ExpCnt;
>From 22af22f795a4192d625a60881815513fd7b53909 Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Tue, 10 Mar 2026 15:58:44 +0000
Subject: [PATCH 5/6] fixup! fixup! fixup! fixup! [AMDGPU][AMDGPUBaseInfo]
Replace Waitcnt members with array
---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +-
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 8 +++-----
2 files changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 365fa44fbbb50..651ff6ad864c7 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -225,7 +225,7 @@ StringLiteral instCounterTypeToStr(InstCounterType T) {
}
}
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void Waitcnt::dump() const { dbgs() << *this << "\n"; }
#endif
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 4ba73a7556ffb..1e1de47b3a81b 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1186,23 +1186,21 @@ class Waitcnt {
// Does the right thing provided self and Other are either both pre-gfx12
// or both gfx12+.
Waitcnt Wait;
- for (InstCounterType T : inst_counter_types()) {
+ for (InstCounterType T : inst_counter_types())
Wait.Cnt[T] = std::min(Cnt[T], Other.Cnt[T]);
- }
return Wait;
}
void print(raw_ostream &OS) const {
ListSeparator LS;
- for (InstCounterType T : inst_counter_types()) {
+ for (InstCounterType T : inst_counter_types())
OS << LS << instCounterTypeToStr(T) << ": " << Cnt[T];
- }
if (LS.unused())
OS << "none";
OS << '\n';
}
-#ifndef NDEBUG
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void dump() const;
#endif
>From 6e49f99dd0e01e628c2c2592754a3a7b86485a3c Mon Sep 17 00:00:00 2001
From: Vasileios Porpodas <vasileios.porpodas at amd.com>
Date: Thu, 19 Mar 2026 15:25:20 +0000
Subject: [PATCH 6/6] fixup! fixup! fixup! fixup! fixup!
[AMDGPU][AMDGPUBaseInfo] Replace Waitcnt members with array
---
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 +-
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 651ff6ad864c7..10c9921f34318 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -198,7 +198,7 @@ iota_range<InstCounterType> inst_counter_types(InstCounterType MaxCounter) {
return enum_seq(LOAD_CNT, MaxCounter);
}
-StringLiteral instCounterTypeToStr(InstCounterType T) {
+StringLiteral getInstCounterName(InstCounterType T) {
switch (T) {
case LOAD_CNT:
return "LOAD_CNT";
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 1e1de47b3a81b..619a8248b22fb 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -1108,7 +1108,7 @@ enum InstCounterType {
NUM_INST_CNTS = NUM_EXPERT_INST_CNTS
};
-StringLiteral instCounterTypeToStr(InstCounterType T);
+StringLiteral getInstCounterName(InstCounterType T);
// Return an iterator over all counters between LOAD_CNT (the first counter)
// and \c MaxCounter (exclusive, default value yields an enumeration over
@@ -1194,7 +1194,7 @@ class Waitcnt {
void print(raw_ostream &OS) const {
ListSeparator LS;
for (InstCounterType T : inst_counter_types())
- OS << LS << instCounterTypeToStr(T) << ": " << Cnt[T];
+ OS << LS << getInstCounterName(T) << ": " << Cnt[T];
if (LS.unused())
OS << "none";
OS << '\n';
More information about the llvm-commits
mailing list