[llvm] [NFCI][AMDGPU] Use `GET_SUBTARGETINFO_MACRO` in `GCNSubtarget.h` and `R600Subtarget.h` (PR #177402)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 23 07:42:04 PST 2026
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/177402
>From 52f02156e1cf727482a126d30ed79e0e5ea7738b Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 22 Jan 2026 11:43:16 -0500
Subject: [PATCH] [NFCI][AMDGPU] Use `GET_SUBTARGETINFO_MACRO` in
`GCNSubtarget.h`
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 4 +-
.../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 2 +-
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 10 -
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h | 83 ++---
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 300 +-----------------
.../AMDGPU/R600ControlFlowFinalizer.cpp | 2 +-
llvm/lib/Target/AMDGPU/R600Processors.td | 6 +-
llvm/lib/Target/AMDGPU/R600Subtarget.h | 23 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 12 +-
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 2 +-
10 files changed, 53 insertions(+), 391 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 7044f91195075..c2ba49d5d37cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2382,9 +2382,9 @@ def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() &&
def UseTrue16WithSramECC : True16PredicateClass<"Subtarget->useRealTrue16Insts() && "
"!Subtarget->d16PreservesUnusedBits()">;
-def HasD16Writes32BitVgpr: Predicate<"Subtarget->hasD16Writes32BitVgpr()">,
+def HasD16Writes32BitVgpr: Predicate<"Subtarget->enableD16Writes32BitVgpr()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts, FeatureD16Writes32BitVgpr)>;
-def NotHasD16Writes32BitVgpr: Predicate<"!Subtarget->hasD16Writes32BitVgpr()">,
+def NotHasD16Writes32BitVgpr: Predicate<"!Subtarget->enableD16Writes32BitVgpr()">,
AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts, (not FeatureD16Writes32BitVgpr))>;
def NotHasMed3_16 : Predicate<"!Subtarget->hasMed3_16()">;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 3355c277e50d2..d18d3a13b29ea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -371,7 +371,7 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F, bool PromoteToLDS) {
DL = &Mod->getDataLayout();
const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
- if (!ST.isPromoteAllocaEnabled())
+ if (!ST.enablePromoteAlloca())
return false;
bool SufficientLDS = PromoteToLDS && hasSufficientLocalMem(F);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 5ca8ee22306f6..300aca1a3d789 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -32,16 +32,6 @@ using namespace llvm;
#define DEBUG_TYPE "amdgpu-subtarget"
-AMDGPUSubtarget::AMDGPUSubtarget(Triple TT) : TargetTriple(std::move(TT)) {}
-
-bool AMDGPUSubtarget::useRealTrue16Insts() const {
- return hasTrue16BitInsts() && EnableRealTrue16Insts;
-}
-
-bool AMDGPUSubtarget::hasD16Writes32BitVgpr() const {
- return EnableD16Writes32BitVgpr;
-}
-
// Returns the maximum per-workgroup LDS allocation size (in bytes) that still
// allows the given function to achieve an occupancy of NWaves waves per
// SIMD / EU, taking into account only the function's *maximum* workgroup size.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 7882df23b9b20..7f56f52bcdd78 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -19,43 +19,6 @@
#include "llvm/Support/Alignment.h"
#include "llvm/TargetParser/Triple.h"
-//===----------------------------------------------------------------------===//
-// X-Macros for simple subtarget features.
-//
-// AMDGPU_SUBTARGET_HAS_FEATURE: Features with both member and getter
-// bool HasXXX = false; // member declaration
-// bool hasXXX() const { return HasXXX; } // getter
-//
-// AMDGPU_SUBTARGET_ENABLE_FEATURE_MEMBER_ONLY: Features with member only
-// bool EnableXXX = false; // member declaration only
-//
-// To add a new simple feature:
-// 1. Add X(FeatureName) to the appropriate macro below
-// 2. Remove the manual bool HasFeatureName declaration from protected section
-// 3. If using AMDGPU_SUBTARGET_HAS_FEATURE, also remove the manual getter
-// 4. If using AMDGPU_SUBTARGET_ENABLE_FEATURE_MEMBER_ONLY, also remove the
-// manual getter
-//
-// Note: The features are ordered alphabetically for convenience. Unlike
-// GCNSubtarget.h, we do not use TableGen-generated features here. We
-// intentionally keep the feature set here minimal. For any new feature, unless
-// it needs to be queried via an AMDGPUSubtarget reference, it should be added
-// to GCNSubtarget.h instead.
-//===----------------------------------------------------------------------===//
-
-#define AMDGPU_SUBTARGET_HAS_FEATURE(X) \
- X(16BitInsts) \
- X(FastFMAF32) \
- X(Inv2PiInlineImm) \
- X(MadMacF32Insts) \
- X(SDWA) \
- X(True16BitInsts) \
- X(VOP3PInsts)
-
-#define AMDGPU_SUBTARGET_ENABLE_FEATURE_MEMBER_ONLY(X) \
- X(RealTrue16Insts) \
- X(D16Writes32BitVgpr)
-
namespace llvm {
enum AMDGPUDwarfFlavour : unsigned;
@@ -88,19 +51,8 @@ class AMDGPUSubtarget {
bool HasMulI24 = true;
bool HasMulU24 = true;
bool HasSMulHi = false;
- bool EnablePromoteAlloca = false;
bool HasFminFmaxLegacy = true;
-#define DECL_HAS_MEMBER(Name) bool Has##Name = false;
- AMDGPU_SUBTARGET_HAS_FEATURE(DECL_HAS_MEMBER)
-#undef DECL_HAS_MEMBER
-#undef AMDGPU_SUBTARGET_HAS_FEATURE_MEMBER_ONLY
-
-#define DECL_ENABLE_MEMBER(Name) bool Enable##Name = false;
- AMDGPU_SUBTARGET_ENABLE_FEATURE_MEMBER_ONLY(DECL_ENABLE_MEMBER)
-#undef DECL_ENABLE_MEMBER
-#undef AMDGPU_SUBTARGET_ENABLE_FEATURE_MEMBER_ONLY
-
unsigned EUsPerCU = 4;
unsigned MaxWavesPerEU = 10;
unsigned LocalMemorySize = 0;
@@ -108,7 +60,7 @@ class AMDGPUSubtarget {
char WavefrontSizeLog2 = 0;
public:
- AMDGPUSubtarget(Triple TT);
+ AMDGPUSubtarget(Triple TT) : TargetTriple(std::move(TT)) {}
static const AMDGPUSubtarget &get(const MachineFunction &MF);
static const AMDGPUSubtarget &get(const TargetMachine &TM,
@@ -231,12 +183,23 @@ class AMDGPUSubtarget {
bool isGCN() const { return TargetTriple.isAMDGCN(); }
- // Simple subtarget feature getters - auto-generated from X-macro.
-#define DECL_HAS_GETTER(Name) \
- bool has##Name() const { return Has##Name; }
- AMDGPU_SUBTARGET_HAS_FEATURE(DECL_HAS_GETTER)
-#undef DECL_HAS_GETTER
-#undef AMDGPU_SUBTARGET_HAS_FEATURE
+ //==----------------------------------------------------------------------===//
+ // TableGen-generated features.
+ //
+ // We mark all features as disabled by default and they will be overridden
+ // by the subtarget features in the target-specific subtarget class. Since
+ // there is overlap between AMDGPUGenSubtargetInfo.inc and
+ // R600GenSubtargetInfo.inc, we can't simply just include both files.
+ // Therefore, we add those virtual functions dedicated for R600Subtarget here.
+ //==----------------------------------------------------------------------===//
+ virtual bool hasCFALUBug() const { return false; }
+ virtual bool hasCaymanISA() const { return false; }
+ virtual bool hasVertexCache() const { return false; }
+ virtual bool hasR600ALUInst() const { return false; }
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ virtual bool GETTER() const { return false; }
+#include "AMDGPUGenSubtargetInfo.inc"
+ //==----------------------------------------------------------------------===//
/// Return true if real (non-fake) variants of True16 instructions using
/// 16-bit registers should be code-generated. Fake True16 instructions are
@@ -244,9 +207,9 @@ class AMDGPUSubtarget {
/// operands and always use their low halves.
// TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully
// supported and the support for fake True16 instructions is removed.
- bool useRealTrue16Insts() const;
-
- bool hasD16Writes32BitVgpr() const;
+ bool useRealTrue16Insts() const {
+ return hasTrue16BitInsts() && enableRealTrue16Insts();
+ }
bool hasMulI24() const {
return HasMulI24;
@@ -264,10 +227,6 @@ class AMDGPUSubtarget {
return HasFminFmaxLegacy;
}
- bool isPromoteAllocaEnabled() const {
- return EnablePromoteAlloca;
- }
-
unsigned getWavefrontSize() const {
return 1 << WavefrontSizeLog2;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ee9d47c15127d..2d66bb3e9bf78 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -26,240 +26,6 @@
#define GET_SUBTARGETINFO_HEADER
#include "AMDGPUGenSubtargetInfo.inc"
-//===----------------------------------------------------------------------===//
-// X-Macros for simple subtarget features.
-//
-// GCN_SUBTARGET_HAS_FEATURE_MEMBER_ONLY: Features with member only (no getter)
-// bool HasXXX = false; // member declaration only
-//
-// GCN_SUBTARGET_HAS_FEATURE: Features with both member and getter
-// bool HasXXX = false; // member declaration
-// bool hasXXX() const { return HasXXX; } // getter
-//
-// To add a new simple feature:
-// 1. Add X(FeatureName) to the appropriate macro below
-// 2. Remove the manual bool HasFeatureName declaration from protected section
-// 3. If using GCN_SUBTARGET_HAS_FEATURE, also remove the manual getter
-//
-// Note: The features are ordered alphabetically for convenience. Preferably
-// this would be generated automatically by TableGen, but there are some cases
-// where the features were not defined in a way that was compatible with the
-// auto-generation.
-//===----------------------------------------------------------------------===//
-
-// Features with member only (no getter generated).
-// These features either have custom getters or code accesses the member
-// directly.
-#define GCN_SUBTARGET_HAS_FEATURE_MEMBER_ONLY(X) \
- X(AssemblerPermissiveWavesize) \
- X(CIInsts) \
- X(FastDenormalF32) \
- X(GCN3Encoding) \
- X(GFX10Insts) \
- X(GFX11Insts) \
- X(GFX12Insts) \
- X(GFX7GFX8GFX9Insts) \
- X(GFX8Insts) \
- X(GFX9Insts) \
- X(LDSMisalignedBug) \
- X(UnalignedBufferAccess) \
- X(UnalignedScratchAccess) \
- X(UserSGPRInit16Bug)
-
-// Features with both member and getter.
-#define GCN_SUBTARGET_HAS_FEATURE(X) \
- X(1_5xVGPRs) \
- X(1024AddressableVGPRs) \
- X(45BitNumRecordsBufferResource) \
- X(64BitLiterals) \
- X(A16) \
- X(AddMinMaxInsts) \
- X(AddNoCarryInsts) \
- X(AddSubU64Insts) \
- X(AgentScopeFineGrainedRemoteMemoryAtomics) \
- X(ApertureRegs) \
- X(ArchitectedFlatScratch) \
- X(ArchitectedSGPRs) \
- X(AshrPkInsts) \
- X(AtomicBufferGlobalPkAddF16Insts) \
- X(AtomicBufferGlobalPkAddF16NoRtnInsts) \
- X(AtomicBufferPkAddBF16Inst) \
- X(AtomicCSubNoRtnInsts) \
- X(AtomicDsPkAdd16Insts) \
- X(AtomicFaddNoRtnInsts) \
- X(AtomicFaddRtnInsts) \
- X(AtomicFlatPkAdd16Insts) \
- X(AtomicFMinFMaxF32FlatInsts) \
- X(AtomicFMinFMaxF32GlobalInsts) \
- X(AtomicFMinFMaxF64FlatInsts) \
- X(AtomicFMinFMaxF64GlobalInsts) \
- X(AtomicGlobalPkAddBF16Inst) \
- X(AutoWaitcntBeforeBarrier) \
- X(BackOffBarrier) \
- X(BF16ConversionInsts) \
- X(BF16PackedInsts) \
- X(BF16TransInsts) \
- X(BF8ConversionScaleInsts) \
- X(BitOp3Insts) \
- X(BVHDualAndBVH8Insts) \
- X(Clusters) \
- X(CubeInsts) \
- X(CvtFP8VOP1Bug) \
- X(CvtNormInsts) \
- X(CvtPkF16F32Inst) \
- X(CvtPkNormVOP2Insts) \
- X(CvtPkNormVOP3Insts) \
- X(DefaultComponentBroadcast) \
- X(DefaultComponentZero) \
- X(DLInsts) \
- X(Dot10Insts) \
- X(Dot11Insts) \
- X(Dot12Insts) \
- X(Dot13Insts) \
- X(Dot1Insts) \
- X(Dot2Insts) \
- X(Dot3Insts) \
- X(Dot4Insts) \
- X(Dot5Insts) \
- X(Dot6Insts) \
- X(Dot7Insts) \
- X(Dot8Insts) \
- X(Dot9Insts) \
- X(DPALU_DPP) \
- X(DPP) \
- X(DPP8) \
- X(DPPSrc1SGPR) \
- X(DsSrc2Insts) \
- X(EmulatedSystemScopeAtomics) \
- X(ExtendedImageInsts) \
- X(F16BF16ToFP6BF6ConversionScaleInsts) \
- X(F32ToF16BF16ConversionSRInsts) \
- X(FlatAddressSpace) \
- X(FlatAtomicFaddF32Inst) \
- X(FlatBufferGlobalAtomicFaddF64Inst) \
- X(FlatGlobalInsts) \
- X(FlatGVSMode) \
- X(FlatInstOffsets) \
- X(FlatScratchInsts) \
- X(FlatSegmentOffsetBug) \
- X(FMA) \
- X(FmacF64Inst) \
- X(FmaMixBF16Insts) \
- X(FmaMixInsts) \
- X(FP4ConversionScaleInsts) \
- X(FP64) \
- X(FP6BF6ConversionScaleInsts) \
- X(FP8ConversionInsts) \
- X(FP8ConversionScaleInsts) \
- X(FP8E5M3Insts) \
- X(FP8Insts) \
- X(FullRate64Ops) \
- X(G16) \
- X(GDS) \
- X(GetWaveIdInst) \
- X(GFX10_3Insts) \
- X(GFX10_AEncoding) \
- X(GFX10_BEncoding) \
- X(GFX1250Insts) \
- X(GFX90AInsts) \
- X(GFX940Insts) \
- X(GFX950Insts) \
- X(GloballyAddressableScratch) \
- X(GWS) \
- X(HalfRate64Ops) \
- X(IEEEMinimumMaximumInsts) \
- X(ImageGather4D16Bug) \
- X(ImageInsts) \
- X(ImageStoreD16Bug) \
- X(InstFwdPrefetchBug) \
- X(IntClamp) \
- X(KernargPreload) \
- X(LdsBarrierArriveAtomic) \
- X(LdsBranchVmemWARHazard) \
- X(LerpInst) \
- X(LshlAddU64Inst) \
- X(MADIntraFwdBug) \
- X(MadMixInsts) \
- X(MadU32Inst) \
- X(MAIInsts) \
- X(McastLoadInsts) \
- X(MemoryAtomicFaddF32DenormalSupport) \
- X(MFMAInlineLiteralBug) \
- X(MIMG_R128) \
- X(Min3Max3PKF16) \
- X(Minimum3Maximum3F16) \
- X(Minimum3Maximum3F32) \
- X(Minimum3Maximum3PKF16) \
- X(Movrel) \
- X(MSAALoadDstSelBug) \
- X(NegativeScratchOffsetBug) \
- X(NegativeUnalignedScratchOffsetBug) \
- X(NoDataDepHazard) \
- X(NoSdstCMPX) \
- X(NSAClauseBug) \
- X(NSAEncoding) \
- X(NSAtoVMEMBug) \
- X(Offset3fBug) \
- X(PackedFP32Ops) \
- X(PackedTID) \
- X(PartialNSAEncoding) \
- X(Permlane16Swap) \
- X(Permlane32Swap) \
- X(PkAddMinMaxInsts) \
- X(PkFmacF16Inst) \
- X(PointSampleAccel) \
- X(PrivEnabledTrap2NopBug) \
- X(PrngInst) \
- X(PseudoScalarTrans) \
- X(QsadInsts) \
- X(R128A16) \
- X(RelaxedBufferOOBMode) \
- X(RequiredExportPriority) \
- X(RestrictedSOffset) \
- X(SadInsts) \
- X(SafeCUPrefetch) \
- X(SafeSmemPrefetch) \
- X(SALUFloatInsts) \
- X(ScalarAtomics) \
- X(ScalarDwordx3Loads) \
- X(ScalarFlatScratchInsts) \
- X(ScalarStores) \
- X(SDWAMac) \
- X(SDWAOmod) \
- X(SDWAOutModsVOPC) \
- X(SDWAScalar) \
- X(SDWASdst) \
- X(SetPrioIncWgInst) \
- X(SetregVGPRMSBFixup) \
- X(SGPRInitBug) \
- X(ShaderCyclesHiLoRegisters) \
- X(ShaderCyclesRegister) \
- X(SMemRealTime) \
- X(SMemTimeInst) \
- X(SMEMtoVectorWriteHazard) \
- X(SWakeupBarrier) \
- X(TanhInsts) \
- X(TensorCvtLutInsts) \
- X(TransposeLoadF4F6Insts) \
- X(TrapHandler) \
- X(TrigReducedRange) \
- X(UnalignedAccessMode) \
- X(UnalignedDSAccess) \
- X(UnpackedD16VMem) \
- X(VALUTransUseHazard) \
- X(VcmpxExecWARHazard) \
- X(VcmpxPermlaneHazard) \
- X(VGPRIndexMode) \
- X(VmemPrefInsts) \
- X(VMemToLDSLoad) \
- X(VMEMtoScalarWriteHazard) \
- X(VmemWriteVgprInOrder) \
- X(VOP3Literal) \
- X(VOPDInsts) \
- X(Vscnt) \
- X(WaitXcnt) \
- X(XF32Insts)
-
namespace llvm {
class GCNTargetMachine;
@@ -310,38 +76,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
/// indicates a lack of S_CLAUSE support.
unsigned MaxHardClauseLength = 0;
- //===--------------------------------------------------------------------===//
- /// Controlled by subtarget features defined in AMDGPU.td
- //===--------------------------------------------------------------------===//
- bool DumpCode = false;
- bool EnableCuMode = false;
- bool EnableDS128 = false;
- bool EnableFlatScratch = false;
- bool EnableLoadStoreOpt = false;
- bool EnablePreciseMemory = false;
- bool EnablePRTStrictNull = false;
- bool EnableSIScheduler = false;
- // This should not be used directly. 'TargetID' tracks the dynamic settings
- // for SRAMECC.
- bool EnableSRAMECC = false;
- bool EnableTgSplit = false;
- bool EnableUnsafeDSOffsetFolding = false;
- bool EnableXNACK = false;
- bool RequiresAlignVGPR = false;
- bool RequiresCOV6 = false;
- bool RequiresWaitsBeforeSystemScopeStores = false;
- bool SupportsSRAMECC = false;
- // This should not be used directly. 'TargetID' tracks the dynamic settings
- // for XNACK.
- bool SupportsXNACK = false;
- bool UseAddPC64Inst = false;
- bool UseBlockVGPROpsForCSR = false;
- bool UseFlatForGlobal = false;
-
-#define DECL_HAS_MEMBER(Name) bool Has##Name = false;
- GCN_SUBTARGET_HAS_FEATURE(DECL_HAS_MEMBER)
- GCN_SUBTARGET_HAS_FEATURE_MEMBER_ONLY(DECL_HAS_MEMBER)
-#undef DECL_HAS_MEMBER
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool ATTRIBUTE = DEFAULT;
+#include "AMDGPUGenSubtargetInfo.inc"
private:
SIInstrInfo InstrInfo;
@@ -404,19 +141,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
- // Simple subtarget feature getters - auto-generated from X-macro.
- // Note: GCN_SUBTARGET_HAS_FEATURE_MEMBER_ONLY features don't get getters.
-#define DECL_HAS_GETTER(Name) \
- bool has##Name() const { return Has##Name; }
- GCN_SUBTARGET_HAS_FEATURE(DECL_HAS_GETTER)
-#undef DECL_HAS_GETTER
-#undef GCN_SUBTARGET_HAS_FEATURE
-#undef GCN_SUBTARGET_HAS_FEATURE_MEMBER_ONLY
-
Generation getGeneration() const { return (Generation)Gen; }
bool isGFX11Plus() const { return getGeneration() >= GFX11; }
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool GETTER() const override { return ATTRIBUTE; }
+#include "AMDGPUGenSubtargetInfo.inc"
+
unsigned getMaxWaveScratchSize() const {
// See COMPUTE_TMPRING_SIZE.WAVESIZE.
if (getGeneration() >= GFX12) {
@@ -536,8 +268,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() <= SEA_ISLANDS ? 1 : 2;
}
- bool dumpCode() const { return DumpCode; }
-
/// Return the amount of LDS that can be used that will not restrict the
/// occupancy lower than WaveCount.
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
@@ -552,8 +282,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() >= AMDGPUSubtarget::GFX10;
}
- bool useFlatForGlobal() const { return UseFlatForGlobal; }
-
/// \returns If target supports ds_read/write_b128 and user enables generation
/// of ds_read/write_b128.
bool useDS128() const { return HasCIInsts && EnableDS128; }
@@ -772,11 +500,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// FLAT GLOBAL VOffset is signed
bool hasSignedGVSOffset() const { return HasGFX1250Insts; }
- bool enableSIScheduler() const { return EnableSIScheduler; }
-
bool loadStoreOptEnabled() const { return EnableLoadStoreOpt; }
- bool hasUserSGPRInit16Bug() const {
+ bool hasUserSGPRInit16BugInWave32() const {
return HasUserSGPRInit16Bug && isWave32();
}
@@ -804,7 +530,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return getGeneration() == AMDGPUSubtarget::GFX9;
}
- bool hasLDSMisalignedBug() const {
+ bool hasLDSMisalignedBugInWGPMode() const {
return HasLDSMisalignedBug && !EnableCuMode;
}
@@ -903,8 +629,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasAddPC64Inst() const { return HasGFX1250Insts; }
- bool useAddPC64Inst() const { return UseAddPC64Inst; }
-
/// \returns true if the target supports expert scheduling mode 2 which relies
/// on the compiler to insert waits to avoid hazards between VMEM and VALU
/// instructions in some instances.
@@ -1265,12 +989,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return 3;
}
- /// \returns true if the sub-target supports buffer resource (V#) with 45-bit
- /// num_records.
- bool requiresWaitsBeforeSystemScopeStores() const {
- return RequiresWaitsBeforeSystemScopeStores;
- }
-
bool supportsBPermute() const {
return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
}
diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 81b142e4e7b9e..248d734268ccf 100644
--- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -68,7 +68,7 @@ bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
getLoopDepth() > 1)
return true;
- if (!ST->hasCFAluBug())
+ if (!ST->hasCFALUBug())
return false;
switch(Opcode) {
diff --git a/llvm/lib/Target/AMDGPU/R600Processors.td b/llvm/lib/Target/AMDGPU/R600Processors.td
index d30228b6970dd..dc21eb9ed011d 100644
--- a/llvm/lib/Target/AMDGPU/R600Processors.td
+++ b/llvm/lib/Target/AMDGPU/R600Processors.td
@@ -14,7 +14,7 @@ class SubtargetFeatureFetchLimit <string Value> :
>;
def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
- "R600ALUInst",
+ "HasR600ALUInst",
"false",
"Older version of ALU instructions encoding"
>;
@@ -29,13 +29,13 @@ def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
>;
def FeatureCaymanISA : SubtargetFeature<"caymanISA",
- "CaymanISA",
+ "HasCaymanISA",
"true",
"Use Cayman ISA"
>;
def FeatureCFALUBug : SubtargetFeature<"cfalubug",
- "CFALUBug",
+ "HasCFALUBug",
"true",
"GPU has CF_ALU bug"
>;
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h
index feda2e5c6d0d1..a30b565003d68 100644
--- a/llvm/lib/Target/AMDGPU/R600Subtarget.h
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h
@@ -27,15 +27,14 @@ namespace llvm {
class R600Subtarget final : public R600GenSubtargetInfo,
public AMDGPUSubtarget {
+
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool ATTRIBUTE = DEFAULT;
+#include "R600GenSubtargetInfo.inc"
+
private:
R600InstrInfo InstrInfo;
R600FrameLowering FrameLowering;
- bool HasFMA = false;
- bool CaymanISA = false;
- bool CFALUBug = false;
- bool HasVertexCache = false;
- bool R600ALUInst = false;
- bool HasFP64 = false;
short TexVTXClauseSize = 0;
Generation Gen = R600;
R600TargetLowering TLInfo;
@@ -102,10 +101,6 @@ class R600Subtarget final : public R600GenSubtargetInfo,
return (getGeneration() >= EVERGREEN);
}
- bool hasCaymanISA() const {
- return CaymanISA;
- }
-
bool hasFFBL() const {
return (getGeneration() >= EVERGREEN);
}
@@ -114,11 +109,9 @@ class R600Subtarget final : public R600GenSubtargetInfo,
return (getGeneration() >= EVERGREEN);
}
- bool hasFMA() const { return HasFMA; }
-
- bool hasCFAluBug() const { return CFALUBug; }
-
- bool hasVertexCache() const { return HasVertexCache; }
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
+ bool GETTER() const override { return ATTRIBUTE; }
+#include "R600GenSubtargetInfo.inc"
short getTexVTXClauseSize() const { return TexVTXClauseSize; }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 657eec1d93076..3097f0717bd2e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1970,7 +1970,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
Align RequiredAlignment(
PowerOf2Ceil(divideCeil(Size, 8))); // Natural alignment.
- if (Subtarget->hasLDSMisalignedBug() && Size > 32 &&
+ if (Subtarget->hasLDSMisalignedBugInWGPMode() && Size > 32 &&
Alignment < RequiredAlignment)
return false;
@@ -3032,7 +3032,7 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF,
CallingConv::ID CallConv,
bool IsShader) const {
bool HasArchitectedSGPRs = Subtarget->hasArchitectedSGPRs();
- if (Subtarget->hasUserSGPRInit16Bug() && !IsShader) {
+ if (Subtarget->hasUserSGPRInit16BugInWave32() && !IsShader) {
// Note: user SGPRs are handled by the front-end for graphics shaders
// Pad up the used user SGPRs with dead inputs.
@@ -3101,7 +3101,7 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF,
CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
}
- assert(!Subtarget->hasUserSGPRInit16Bug() || IsShader ||
+ assert(!Subtarget->hasUserSGPRInit16BugInWave32() || IsShader ||
Info.getNumPreloadedSGPRs() >= 16);
}
@@ -12103,7 +12103,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
Align Alignment = Load->getAlign();
unsigned AS = Load->getAddressSpace();
- if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS &&
+ if (Subtarget->hasLDSMisalignedBugInWGPMode() &&
+ AS == AMDGPUAS::FLAT_ADDRESS &&
Alignment.value() < MemVT.getStoreSize() && MemVT.getSizeInBits() > 32) {
return SplitVectorLoad(Op, DAG);
}
@@ -12727,7 +12728,8 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
Store->getValue().getValueType().getScalarType() == MVT::i32);
unsigned AS = Store->getAddressSpace();
- if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS &&
+ if (Subtarget->hasLDSMisalignedBugInWGPMode() &&
+ AS == AMDGPUAS::FLAT_ADDRESS &&
Store->getAlign().value() < VT.getStoreSize() &&
VT.getSizeInBits() > 32) {
return SplitVectorStore(Op, DAG);
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 349c158c8838d..17e45af50fb72 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -840,7 +840,7 @@ class WaitcntBrackets {
return {{}, {}};
const TargetRegisterClass *RC = Context->TRI->getPhysRegBaseClass(Reg);
unsigned Size = Context->TRI->getRegSizeInBits(*RC);
- if (Size == 16 && Context->ST->hasD16Writes32BitVgpr())
+ if (Size == 16 && Context->ST->enableD16Writes32BitVgpr())
Reg = Context->TRI->get32BitRegister(Reg);
return Context->TRI->regunits(Reg);
}
More information about the llvm-commits
mailing list