[llvm] [AMDGPU] Use subtarget feature for flat offset bit width instead of arch checks (PR #183742)
Mariusz Sikora via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 3 02:20:16 PST 2026
https://github.com/mariusz-sikora-at-amd updated https://github.com/llvm/llvm-project/pull/183742
>From 72a1d8659db4855063bfc05dc15d621a3bb62fb6 Mon Sep 17 00:00:00 2001
From: Mariusz Sikora <mariusz.sikora at amd.com>
Date: Fri, 27 Feb 2026 09:07:12 -0500
Subject: [PATCH 1/6] [AMDGPU] Use subtarget feature for flat offset bit width
instead of arch checks
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 5 +++--
llvm/lib/Target/AMDGPU/AMDGPUFeatures.td | 10 ++++++++++
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 3 +++
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 5 ++---
4 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index eee33dbeea022..a249e359e5df8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1411,7 +1411,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
FeatureVmemWriteVgprInOrder, FeatureVMemToLDSLoad, FeatureCubeInsts,
FeatureLerpInst, FeatureSadInsts, FeatureQsadInsts,
FeatureCvtNormInsts, FeatureCvtPkNormVOP2Insts,
- FeatureCvtPkNormVOP3Insts
+ FeatureCvtPkNormVOP3Insts, FeatureFlatOffsetBits12
]
>;
@@ -1460,7 +1460,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
FeatureDefaultComponentBroadcast, FeatureMaxHardClauseLength32,
FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
FeatureIEEEMinimumMaximumInsts, FeatureMinimum3Maximum3F32,
- FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics
+ FeatureMinimum3Maximum3F16, FeatureAgentScopeFineGrainedRemoteMemoryAtomics,
+ FeatureFlatOffsetBits24
]
>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
index cdbd051a0c0ee..594e2ad0e3e03 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
@@ -18,6 +18,16 @@ def FeatureFMA : SubtargetFeature<"fmaf",
"Enable single precision FMA (not as fast as mul+add, but fused)"
>;
+class SubtargetFeatureFlatOffsetBits <int Value> : SubtargetFeature<
+ "flat-offset-bits-" # Value,
+ "FlatOffsetBitWidth",
+ !cast<string>(Value),
+ "Number of bits for flat offset encoding"
+>;
+
+def FeatureFlatOffsetBits12 : SubtargetFeatureFlatOffsetBits<12>;
+def FeatureFlatOffsetBits24 : SubtargetFeatureFlatOffsetBits<24>;
+
// Addressable local memory size is the maximum number of bytes of LDS that can
// be allocated to a single workgroup.
class SubtargetFeatureAddressableLocalMemorySize <int Value> : SubtargetFeature<
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 302fe7c850b75..90c2590d472b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -59,6 +59,7 @@ class AMDGPUSubtarget {
unsigned LocalMemorySize = 0;
unsigned AddressableLocalMemorySize = 0;
char WavefrontSizeLog2 = 0;
+ unsigned FlatOffsetBitWidth = 13;
public:
AMDGPUSubtarget(Triple TT) : TargetTriple(std::move(TT)) {}
@@ -235,6 +236,8 @@ class AMDGPUSubtarget {
return AddressableLocalMemorySize;
}
+ unsigned getFlatOffsetBitWidth() const { return FlatOffsetBitWidth; }
+
/// Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the
/// "CU" is the unit onto which workgroups are mapped. This takes WGP mode vs.
/// CU mode into account.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index c1337f27a0f70..9a47567a40689 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -3465,10 +3465,9 @@ std::optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
}
unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST) {
- if (AMDGPU::isGFX10(ST))
+ if (ST.getFeatureBits().test(FeatureFlatOffsetBits12))
return 12;
-
- if (AMDGPU::isGFX12(ST))
+ if (ST.getFeatureBits().test(FeatureFlatOffsetBits24))
return 24;
return 13;
}
>From b4e01caa88d898d06df7e791d70744f13d242e7d Mon Sep 17 00:00:00 2001
From: Mariusz Sikora <mariusz.sikora at amd.com>
Date: Mon, 2 Mar 2026 03:31:01 -0500
Subject: [PATCH 2/6] Remove unused getFlatOffsetBitWidth()
---
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 90c2590d472b1..d05f31a888f1e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -236,8 +236,6 @@ class AMDGPUSubtarget {
return AddressableLocalMemorySize;
}
- unsigned getFlatOffsetBitWidth() const { return FlatOffsetBitWidth; }
-
/// Number of SIMDs/EUs (execution units) per "CU" ("compute unit"), where the
/// "CU" is the unit onto which workgroups are mapped. This takes WGP mode vs.
/// CU mode into account.
>From 5da653dac871cc2be9ff69286b436e1bf942d0ee Mon Sep 17 00:00:00 2001
From: Mariusz Sikora <mariusz.sikora at amd.com>
Date: Tue, 3 Mar 2026 02:56:46 -0500
Subject: [PATCH 3/6] Option 1 - set FlatOffsetBitWidth to 0 by default
---
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index d05f31a888f1e..f7c1f07ae3abb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -59,7 +59,7 @@ class AMDGPUSubtarget {
unsigned LocalMemorySize = 0;
unsigned AddressableLocalMemorySize = 0;
char WavefrontSizeLog2 = 0;
- unsigned FlatOffsetBitWidth = 13;
+ unsigned FlatOffsetBitWidth = 0;
public:
AMDGPUSubtarget(Triple TT) : TargetTriple(std::move(TT)) {}
>From fff8d9ba3dab81f72bee954875b8fd40e7c7d90d Mon Sep 17 00:00:00 2001
From: Mariusz Sikora <mariusz.sikora at amd.com>
Date: Tue, 3 Mar 2026 03:11:37 -0500
Subject: [PATCH 4/6] Option 2 - add two boolean AMDGPUSubtargetFeatures
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 12 ++++++++++++
llvm/lib/Target/AMDGPU/AMDGPUFeatures.td | 10 ----------
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 1 -
3 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index a249e359e5df8..69b21112385a2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -303,6 +303,18 @@ class FeatureMaxHardClauseLength<int size> : SubtargetFeature<
"Maximum number of instructions in an explicit S_CLAUSE is "#size
>;
+defm FlatOffsetBits12 : AMDGPUSubtargetFeature<"flat-offset-bits-12",
+ "Flat offset encoding has 12 bits",
+ /*GenPredicate=*/ 0,
+ /*GenAssemblerPredicate=*/ 0
+>;
+
+defm FlatOffsetBits24 : AMDGPUSubtargetFeature<"flat-offset-bits-24",
+ "Flat offset encoding has 24 bits",
+ /*GenPredicate=*/ 0,
+ /*GenAssemblerPredicate=*/ 0
+>;
+
/// Work around a hardware bug on some chips that can be triggered
/// under certain circumstances when clauses are longer than 32 operations.
def FeatureMaxHardClauseLength32 : FeatureMaxHardClauseLength<32>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
index 594e2ad0e3e03..cdbd051a0c0ee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
@@ -18,16 +18,6 @@ def FeatureFMA : SubtargetFeature<"fmaf",
"Enable single precision FMA (not as fast as mul+add, but fused)"
>;
-class SubtargetFeatureFlatOffsetBits <int Value> : SubtargetFeature<
- "flat-offset-bits-" # Value,
- "FlatOffsetBitWidth",
- !cast<string>(Value),
- "Number of bits for flat offset encoding"
->;
-
-def FeatureFlatOffsetBits12 : SubtargetFeatureFlatOffsetBits<12>;
-def FeatureFlatOffsetBits24 : SubtargetFeatureFlatOffsetBits<24>;
-
// Addressable local memory size is the maximum number of bytes of LDS that can
// be allocated to a single workgroup.
class SubtargetFeatureAddressableLocalMemorySize <int Value> : SubtargetFeature<
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index f7c1f07ae3abb..302fe7c850b75 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -59,7 +59,6 @@ class AMDGPUSubtarget {
unsigned LocalMemorySize = 0;
unsigned AddressableLocalMemorySize = 0;
char WavefrontSizeLog2 = 0;
- unsigned FlatOffsetBitWidth = 0;
public:
AMDGPUSubtarget(Triple TT) : TargetTriple(std::move(TT)) {}
>From be3eb435973b72b283f26969f51c34b69f2735b5 Mon Sep 17 00:00:00 2001
From: Mariusz Sikora <mariusz.sikora at amd.com>
Date: Tue, 3 Mar 2026 05:06:42 -0500
Subject: [PATCH 5/6] Revert "Option 2 - add two boolean
AMDGPUSubtargetFeatures"
This reverts commit fff8d9ba3dab81f72bee954875b8fd40e7c7d90d.
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 12 ------------
llvm/lib/Target/AMDGPU/AMDGPUFeatures.td | 10 ++++++++++
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 1 +
3 files changed, 11 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 69b21112385a2..a249e359e5df8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -303,18 +303,6 @@ class FeatureMaxHardClauseLength<int size> : SubtargetFeature<
"Maximum number of instructions in an explicit S_CLAUSE is "#size
>;
-defm FlatOffsetBits12 : AMDGPUSubtargetFeature<"flat-offset-bits-12",
- "Flat offset encoding has 12 bits",
- /*GenPredicate=*/ 0,
- /*GenAssemblerPredicate=*/ 0
->;
-
-defm FlatOffsetBits24 : AMDGPUSubtargetFeature<"flat-offset-bits-24",
- "Flat offset encoding has 24 bits",
- /*GenPredicate=*/ 0,
- /*GenAssemblerPredicate=*/ 0
->;
-
/// Work around a hardware bug on some chips that can be triggered
/// under certain circumstances when clauses are longer than 32 operations.
def FeatureMaxHardClauseLength32 : FeatureMaxHardClauseLength<32>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
index cdbd051a0c0ee..594e2ad0e3e03 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUFeatures.td
@@ -18,6 +18,16 @@ def FeatureFMA : SubtargetFeature<"fmaf",
"Enable single precision FMA (not as fast as mul+add, but fused)"
>;
+class SubtargetFeatureFlatOffsetBits <int Value> : SubtargetFeature<
+ "flat-offset-bits-" # Value,
+ "FlatOffsetBitWidth",
+ !cast<string>(Value),
+ "Number of bits for flat offset encoding"
+>;
+
+def FeatureFlatOffsetBits12 : SubtargetFeatureFlatOffsetBits<12>;
+def FeatureFlatOffsetBits24 : SubtargetFeatureFlatOffsetBits<24>;
+
// Addressable local memory size is the maximum number of bytes of LDS that can
// be allocated to a single workgroup.
class SubtargetFeatureAddressableLocalMemorySize <int Value> : SubtargetFeature<
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 302fe7c850b75..f7c1f07ae3abb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -59,6 +59,7 @@ class AMDGPUSubtarget {
unsigned LocalMemorySize = 0;
unsigned AddressableLocalMemorySize = 0;
char WavefrontSizeLog2 = 0;
+ unsigned FlatOffsetBitWidth = 0;
public:
AMDGPUSubtarget(Triple TT) : TargetTriple(std::move(TT)) {}
>From b2c46f92317b81bd0d14e292f230c10735bbdf6d Mon Sep 17 00:00:00 2001
From: Mariusz Sikora <mariusz.sikora at amd.com>
Date: Tue, 3 Mar 2026 05:08:45 -0500
Subject: [PATCH 6/6] init FlatOffsetBitWidth to 13 in
initializeSubtargetDependencies
---
llvm/lib/Target/AMDGPU/GCNSubtarget.cpp | 3 +++
1 file changed, 3 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
index 9b6592a7da4bc..8c98e8b589b13 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp
@@ -140,6 +140,9 @@ GCNSubtarget &GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
if (AddressableLocalMemorySize == 0)
AddressableLocalMemorySize = 32768;
+ if (FlatOffsetBitWidth == 0)
+ FlatOffsetBitWidth = 13;
+
LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(this);
HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
More information about the llvm-commits
mailing list