[llvm] AMDGPU: don't call getSubtarget<GCNSubtarget> on R600 targets. (PR #162207)
James Y Knight via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 6 19:04:31 PDT 2025
https://github.com/jyknight updated https://github.com/llvm/llvm-project/pull/162207
>From d6f729761c77b7fb380d4c23296f8f5c9cb6d984 Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Mon, 6 Oct 2025 20:43:27 -0400
Subject: [PATCH 1/2] AMDGPU: don't call getSubtarget<GCNSubtarget> on R600
targets.
Unfortunately, `TargetMachine::getSubtarget<ST>` does an unchecked
static_cast to `ST&`, which makes it easy to get wrong.
The modifications here were created by running check-llvm with an
assert added to getSubtarget. However, that asssert requires that RTTI
is enabled, which LLVM doesn't use, so I've reverted the assert before
sending this fix upstream.
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 26 ++++++++++++-------
.../AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp | 2 +-
2 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index cb49936871e74..f6186f1fae777 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -162,12 +162,16 @@ class AMDGPUInformationCache : public InformationCache {
/// Check if the subtarget has aperture regs.
bool hasApertureRegs(Function &F) {
+ if (!TM.getTargetTriple().isAMDGCN())
+ return false;
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
return ST.hasApertureRegs();
}
/// Check if the subtarget supports GetDoorbellID.
bool supportsGetDoorbellID(Function &F) {
+ if (!TM.getTargetTriple().isAMDGCN())
+ return false;
const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
return ST.supportsGetDoorbellID();
}
@@ -182,18 +186,18 @@ class AMDGPUInformationCache : public InformationCache {
std::pair<unsigned, unsigned>
getDefaultFlatWorkGroupSize(const Function &F) const {
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
return ST.getDefaultFlatWorkGroupSize(F.getCallingConv());
}
std::pair<unsigned, unsigned>
getMaximumFlatWorkGroupRange(const Function &F) {
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
return {ST.getMinFlatWorkGroupSize(), ST.getMaxFlatWorkGroupSize()};
}
SmallVector<unsigned> getMaxNumWorkGroups(const Function &F) {
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
return ST.getMaxNumWorkGroups(F);
}
@@ -206,7 +210,7 @@ class AMDGPUInformationCache : public InformationCache {
std::pair<unsigned, unsigned>
getWavesPerEU(const Function &F,
std::pair<unsigned, unsigned> FlatWorkGroupSize) {
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
return ST.getWavesPerEU(FlatWorkGroupSize, getLDSSize(F), F);
}
@@ -217,7 +221,7 @@ class AMDGPUInformationCache : public InformationCache {
if (!Val)
return std::nullopt;
if (!Val->second) {
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
Val->second = ST.getMaxWavesPerEU();
}
return std::make_pair(Val->first, *(Val->second));
@@ -227,13 +231,13 @@ class AMDGPUInformationCache : public InformationCache {
getEffectiveWavesPerEU(const Function &F,
std::pair<unsigned, unsigned> WavesPerEU,
std::pair<unsigned, unsigned> FlatWorkGroupSize) {
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
return ST.getEffectiveWavesPerEU(WavesPerEU, FlatWorkGroupSize,
getLDSSize(F));
}
unsigned getMaxWavesPerEU(const Function &F) {
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
+ const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
return ST.getMaxWavesPerEU();
}
@@ -1511,9 +1515,11 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
A.getOrCreateAAFor<AAAMDWavesPerEU>(IRPosition::function(*F));
}
- const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
- if (!F->isDeclaration() && ST.hasClusters())
- A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
+ if (TM.getTargetTriple().isAMDGCN()) {
+ const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(*F);
+ if (!F->isDeclaration() && ST.hasClusters())
+ A.getOrCreateAAFor<AAAMDGPUClusterDims>(IRPosition::function(*F));
+ }
for (auto &I : instructions(F)) {
Value *Ptr = nullptr;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp
index 639089c75a33e..155c7dad904c5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp
@@ -281,7 +281,7 @@ bool optimizeSection(ArrayRef<SmallVector<IntrinsicInst *, 4>> MergeableInsts) {
}
static bool imageIntrinsicOptimizerImpl(Function &F, const TargetMachine *TM) {
- if (!TM)
+ if (!TM || !TM->getTargetTriple().isAMDGCN())
return false;
// This optimization only applies to GFX11 and beyond.
>From 4becdd9fab562fb01305c1ebc7ac5a9c2ef6fe5a Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Mon, 6 Oct 2025 21:53:26 -0400
Subject: [PATCH 2/2] Review feedback: don't add AMDGPUImageIntrinsicOptimizer
pass for R600.
---
llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp | 2 +-
llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp | 3 ++-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp
index 155c7dad904c5..639089c75a33e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUImageIntrinsicOptimizer.cpp
@@ -281,7 +281,7 @@ bool optimizeSection(ArrayRef<SmallVector<IntrinsicInst *, 4>> MergeableInsts) {
}
static bool imageIntrinsicOptimizerImpl(Function &F, const TargetMachine *TM) {
- if (!TM || !TM->getTargetTriple().isAMDGCN())
+ if (!TM)
return false;
// This optimization only applies to GFX11 and beyond.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 280fbe20667c6..d56e743d753a2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -1296,7 +1296,8 @@ void AMDGPUPassConfig::addIRPasses() {
if (LowerCtorDtor)
addPass(createAMDGPUCtorDtorLoweringLegacyPass());
- if (isPassEnabled(EnableImageIntrinsicOptimizer))
+ if (TM.getTargetTriple().isAMDGCN() &&
+ isPassEnabled(EnableImageIntrinsicOptimizer))
addPass(createAMDGPUImageIntrinsicOptimizerPass(&TM));
// This can be disabled by passing ::Disable here or on the command line
More information about the llvm-commits
mailing list