[llvm] [NFCI][AMDGPU] Use `GET_SUBTARGETINFO_MACRO` in `GCNSubtarget.h` and `R600Subtarget.h` (PR #177402)

Shilei Tian via llvm-commits llvm-commits at lists.llvm.org
Fri Jan 23 07:42:04 PST 2026


https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/177402

>From 52f02156e1cf727482a126d30ed79e0e5ea7738b Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Thu, 22 Jan 2026 11:43:16 -0500
Subject: [PATCH] [NFCI][AMDGPU] Use `GET_SUBTARGETINFO_MACRO` in
 `GCNSubtarget.h`

---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |   4 +-
 .../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp |   2 +-
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp    |  10 -
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h      |  83 ++---
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         | 300 +-----------------
 .../AMDGPU/R600ControlFlowFinalizer.cpp       |   2 +-
 llvm/lib/Target/AMDGPU/R600Processors.td      |   6 +-
 llvm/lib/Target/AMDGPU/R600Subtarget.h        |  23 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  12 +-
 llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp   |   2 +-
 10 files changed, 53 insertions(+), 391 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 7044f91195075..c2ba49d5d37cb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2382,9 +2382,9 @@ def UseFakeTrue16Insts : True16PredicateClass<"Subtarget->hasTrue16BitInsts() &&
 def UseTrue16WithSramECC : True16PredicateClass<"Subtarget->useRealTrue16Insts() && "
                                                 "!Subtarget->d16PreservesUnusedBits()">;
 
-def HasD16Writes32BitVgpr: Predicate<"Subtarget->hasD16Writes32BitVgpr()">,
+def HasD16Writes32BitVgpr: Predicate<"Subtarget->enableD16Writes32BitVgpr()">,
   AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts, FeatureD16Writes32BitVgpr)>;
-def NotHasD16Writes32BitVgpr: Predicate<"!Subtarget->hasD16Writes32BitVgpr()">,
+def NotHasD16Writes32BitVgpr: Predicate<"!Subtarget->enableD16Writes32BitVgpr()">,
   AssemblerPredicate<(all_of FeatureTrue16BitInsts, FeatureRealTrue16Insts, (not FeatureD16Writes32BitVgpr))>;
 
 def NotHasMed3_16 : Predicate<"!Subtarget->hasMed3_16()">;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 3355c277e50d2..d18d3a13b29ea 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -371,7 +371,7 @@ bool AMDGPUPromoteAllocaImpl::run(Function &F, bool PromoteToLDS) {
   DL = &Mod->getDataLayout();
 
   const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(TM, F);
-  if (!ST.isPromoteAllocaEnabled())
+  if (!ST.enablePromoteAlloca())
     return false;
 
   bool SufficientLDS = PromoteToLDS && hasSufficientLocalMem(F);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 5ca8ee22306f6..300aca1a3d789 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -32,16 +32,6 @@ using namespace llvm;
 
 #define DEBUG_TYPE "amdgpu-subtarget"
 
-AMDGPUSubtarget::AMDGPUSubtarget(Triple TT) : TargetTriple(std::move(TT)) {}
-
-bool AMDGPUSubtarget::useRealTrue16Insts() const {
-  return hasTrue16BitInsts() && EnableRealTrue16Insts;
-}
-
-bool AMDGPUSubtarget::hasD16Writes32BitVgpr() const {
-  return EnableD16Writes32BitVgpr;
-}
-
 // Returns the maximum per-workgroup LDS allocation size (in bytes) that still
 // allows the given function to achieve an occupancy of NWaves waves per
 // SIMD / EU, taking into account only the function's *maximum* workgroup size.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 7882df23b9b20..7f56f52bcdd78 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -19,43 +19,6 @@
 #include "llvm/Support/Alignment.h"
 #include "llvm/TargetParser/Triple.h"
 
-//===----------------------------------------------------------------------===//
-// X-Macros for simple subtarget features.
-//
-// AMDGPU_SUBTARGET_HAS_FEATURE: Features with both member and getter
-//   bool HasXXX = false;                      // member declaration
-//   bool hasXXX() const { return HasXXX; }    // getter
-//
-// AMDGPU_SUBTARGET_ENABLE_FEATURE_MEMBER_ONLY: Features with member only
-//   bool EnableXXX = false;                   // member declaration only
-//
-// To add a new simple feature:
-//   1. Add X(FeatureName) to the appropriate macro below
-//   2. Remove the manual bool HasFeatureName declaration from protected section
-//   3. If using AMDGPU_SUBTARGET_HAS_FEATURE, also remove the manual getter
-//   4. If using AMDGPU_SUBTARGET_ENABLE_FEATURE_MEMBER_ONLY, also remove the
-//      manual getter
-//
-// Note: The features are ordered alphabetically for convenience. Unlike
-// GCNSubtarget.h, we do not use TableGen-generated features here. We
-// intentionally keep the feature set here minimal. For any new feature, unless
-// it needs to be queried via an AMDGPUSubtarget reference, it should be added
-// to GCNSubtarget.h instead.
-//===----------------------------------------------------------------------===//
-
-#define AMDGPU_SUBTARGET_HAS_FEATURE(X)                                        \
-  X(16BitInsts)                                                                \
-  X(FastFMAF32)                                                                \
-  X(Inv2PiInlineImm)                                                           \
-  X(MadMacF32Insts)                                                            \
-  X(SDWA)                                                                      \
-  X(True16BitInsts)                                                            \
-  X(VOP3PInsts)
-
-#define AMDGPU_SUBTARGET_ENABLE_FEATURE_MEMBER_ONLY(X)                         \
-  X(RealTrue16Insts)                                                           \
-  X(D16Writes32BitVgpr)
-
 namespace llvm {
 
 enum AMDGPUDwarfFlavour : unsigned;
@@ -88,19 +51,8 @@ class AMDGPUSubtarget {
   bool HasMulI24 = true;
   bool HasMulU24 = true;
   bool HasSMulHi = false;
-  bool EnablePromoteAlloca = false;
   bool HasFminFmaxLegacy = true;
 
-#define DECL_HAS_MEMBER(Name) bool Has##Name = false;
-  AMDGPU_SUBTARGET_HAS_FEATURE(DECL_HAS_MEMBER)
-#undef DECL_HAS_MEMBER
-#undef AMDGPU_SUBTARGET_HAS_FEATURE_MEMBER_ONLY
-
-#define DECL_ENABLE_MEMBER(Name) bool Enable##Name = false;
-  AMDGPU_SUBTARGET_ENABLE_FEATURE_MEMBER_ONLY(DECL_ENABLE_MEMBER)
-#undef DECL_ENABLE_MEMBER
-#undef AMDGPU_SUBTARGET_ENABLE_FEATURE_MEMBER_ONLY
-
   unsigned EUsPerCU = 4;
   unsigned MaxWavesPerEU = 10;
   unsigned LocalMemorySize = 0;
@@ -108,7 +60,7 @@ class AMDGPUSubtarget {
   char WavefrontSizeLog2 = 0;
 
 public:
-  AMDGPUSubtarget(Triple TT);
+  AMDGPUSubtarget(Triple TT) : TargetTriple(std::move(TT)) {}
 
   static const AMDGPUSubtarget &get(const MachineFunction &MF);
   static const AMDGPUSubtarget &get(const TargetMachine &TM,
@@ -231,12 +183,23 @@ class AMDGPUSubtarget {
 
   bool isGCN() const { return TargetTriple.isAMDGCN(); }
 
-  // Simple subtarget feature getters - auto-generated from X-macro.
-#define DECL_HAS_GETTER(Name)                                                  \
-  bool has##Name() const { return Has##Name; }
-  AMDGPU_SUBTARGET_HAS_FEATURE(DECL_HAS_GETTER)
-#undef DECL_HAS_GETTER
-#undef AMDGPU_SUBTARGET_HAS_FEATURE
+  //==----------------------------------------------------------------------===//
+  // TableGen-generated features.
+  //
+  // We mark all features as disabled by default and they will be overridden
+  // by the subtarget features in the target-specific subtarget class. Since
+  // there is overlap between AMDGPUGenSubtargetInfo.inc and
+  // R600GenSubtargetInfo.inc, we can't simply just include both files.
+  // Therefore, we add those virtual functions dedicated for R600Subtarget here.
+  //==----------------------------------------------------------------------===//
+  virtual bool hasCFALUBug() const { return false; }
+  virtual bool hasCaymanISA() const { return false; }
+  virtual bool hasVertexCache() const { return false; }
+  virtual bool hasR600ALUInst() const { return false; }
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
+  virtual bool GETTER() const { return false; }
+#include "AMDGPUGenSubtargetInfo.inc"
+  //==----------------------------------------------------------------------===//
 
   /// Return true if real (non-fake) variants of True16 instructions using
   /// 16-bit registers should be code-generated. Fake True16 instructions are
@@ -244,9 +207,9 @@ class AMDGPUSubtarget {
   /// operands and always use their low halves.
   // TODO: Remove and use hasTrue16BitInsts() instead once True16 is fully
   // supported and the support for fake True16 instructions is removed.
-  bool useRealTrue16Insts() const;
-
-  bool hasD16Writes32BitVgpr() const;
+  bool useRealTrue16Insts() const {
+    return hasTrue16BitInsts() && enableRealTrue16Insts();
+  }
 
   bool hasMulI24() const {
     return HasMulI24;
@@ -264,10 +227,6 @@ class AMDGPUSubtarget {
     return HasFminFmaxLegacy;
   }
 
-  bool isPromoteAllocaEnabled() const {
-    return EnablePromoteAlloca;
-  }
-
   unsigned getWavefrontSize() const {
     return 1 << WavefrontSizeLog2;
   }
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index ee9d47c15127d..2d66bb3e9bf78 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -26,240 +26,6 @@
 #define GET_SUBTARGETINFO_HEADER
 #include "AMDGPUGenSubtargetInfo.inc"
 
-//===----------------------------------------------------------------------===//
-// X-Macros for simple subtarget features.
-//
-// GCN_SUBTARGET_HAS_FEATURE_MEMBER_ONLY: Features with member only (no getter)
-//   bool HasXXX = false;                      // member declaration only
-//
-// GCN_SUBTARGET_HAS_FEATURE: Features with both member and getter
-//   bool HasXXX = false;                      // member declaration
-//   bool hasXXX() const { return HasXXX; }    // getter
-//
-// To add a new simple feature:
-//   1. Add X(FeatureName) to the appropriate macro below
-//   2. Remove the manual bool HasFeatureName declaration from protected section
-//   3. If using GCN_SUBTARGET_HAS_FEATURE, also remove the manual getter
-//
-// Note: The features are ordered alphabetically for convenience. Preferably
-// this would be generated automatically by TableGen, but there are some cases
-// where the features were not defined in a way that was compatible with the
-// auto-generation.
-//===----------------------------------------------------------------------===//
-
-// Features with member only (no getter generated).
-// These features either have custom getters or code accesses the member
-// directly.
-#define GCN_SUBTARGET_HAS_FEATURE_MEMBER_ONLY(X)                               \
-  X(AssemblerPermissiveWavesize)                                               \
-  X(CIInsts)                                                                   \
-  X(FastDenormalF32)                                                           \
-  X(GCN3Encoding)                                                              \
-  X(GFX10Insts)                                                                \
-  X(GFX11Insts)                                                                \
-  X(GFX12Insts)                                                                \
-  X(GFX7GFX8GFX9Insts)                                                         \
-  X(GFX8Insts)                                                                 \
-  X(GFX9Insts)                                                                 \
-  X(LDSMisalignedBug)                                                          \
-  X(UnalignedBufferAccess)                                                     \
-  X(UnalignedScratchAccess)                                                    \
-  X(UserSGPRInit16Bug)
-
-// Features with both member and getter.
-#define GCN_SUBTARGET_HAS_FEATURE(X)                                           \
-  X(1_5xVGPRs)                                                                 \
-  X(1024AddressableVGPRs)                                                      \
-  X(45BitNumRecordsBufferResource)                                             \
-  X(64BitLiterals)                                                             \
-  X(A16)                                                                       \
-  X(AddMinMaxInsts)                                                            \
-  X(AddNoCarryInsts)                                                           \
-  X(AddSubU64Insts)                                                            \
-  X(AgentScopeFineGrainedRemoteMemoryAtomics)                                  \
-  X(ApertureRegs)                                                              \
-  X(ArchitectedFlatScratch)                                                    \
-  X(ArchitectedSGPRs)                                                          \
-  X(AshrPkInsts)                                                               \
-  X(AtomicBufferGlobalPkAddF16Insts)                                           \
-  X(AtomicBufferGlobalPkAddF16NoRtnInsts)                                      \
-  X(AtomicBufferPkAddBF16Inst)                                                 \
-  X(AtomicCSubNoRtnInsts)                                                      \
-  X(AtomicDsPkAdd16Insts)                                                      \
-  X(AtomicFaddNoRtnInsts)                                                      \
-  X(AtomicFaddRtnInsts)                                                        \
-  X(AtomicFlatPkAdd16Insts)                                                    \
-  X(AtomicFMinFMaxF32FlatInsts)                                                \
-  X(AtomicFMinFMaxF32GlobalInsts)                                              \
-  X(AtomicFMinFMaxF64FlatInsts)                                                \
-  X(AtomicFMinFMaxF64GlobalInsts)                                              \
-  X(AtomicGlobalPkAddBF16Inst)                                                 \
-  X(AutoWaitcntBeforeBarrier)                                                  \
-  X(BackOffBarrier)                                                            \
-  X(BF16ConversionInsts)                                                       \
-  X(BF16PackedInsts)                                                           \
-  X(BF16TransInsts)                                                            \
-  X(BF8ConversionScaleInsts)                                                   \
-  X(BitOp3Insts)                                                               \
-  X(BVHDualAndBVH8Insts)                                                       \
-  X(Clusters)                                                                  \
-  X(CubeInsts)                                                                 \
-  X(CvtFP8VOP1Bug)                                                             \
-  X(CvtNormInsts)                                                              \
-  X(CvtPkF16F32Inst)                                                           \
-  X(CvtPkNormVOP2Insts)                                                        \
-  X(CvtPkNormVOP3Insts)                                                        \
-  X(DefaultComponentBroadcast)                                                 \
-  X(DefaultComponentZero)                                                      \
-  X(DLInsts)                                                                   \
-  X(Dot10Insts)                                                                \
-  X(Dot11Insts)                                                                \
-  X(Dot12Insts)                                                                \
-  X(Dot13Insts)                                                                \
-  X(Dot1Insts)                                                                 \
-  X(Dot2Insts)                                                                 \
-  X(Dot3Insts)                                                                 \
-  X(Dot4Insts)                                                                 \
-  X(Dot5Insts)                                                                 \
-  X(Dot6Insts)                                                                 \
-  X(Dot7Insts)                                                                 \
-  X(Dot8Insts)                                                                 \
-  X(Dot9Insts)                                                                 \
-  X(DPALU_DPP)                                                                 \
-  X(DPP)                                                                       \
-  X(DPP8)                                                                      \
-  X(DPPSrc1SGPR)                                                               \
-  X(DsSrc2Insts)                                                               \
-  X(EmulatedSystemScopeAtomics)                                                \
-  X(ExtendedImageInsts)                                                        \
-  X(F16BF16ToFP6BF6ConversionScaleInsts)                                       \
-  X(F32ToF16BF16ConversionSRInsts)                                             \
-  X(FlatAddressSpace)                                                          \
-  X(FlatAtomicFaddF32Inst)                                                     \
-  X(FlatBufferGlobalAtomicFaddF64Inst)                                         \
-  X(FlatGlobalInsts)                                                           \
-  X(FlatGVSMode)                                                               \
-  X(FlatInstOffsets)                                                           \
-  X(FlatScratchInsts)                                                          \
-  X(FlatSegmentOffsetBug)                                                      \
-  X(FMA)                                                                       \
-  X(FmacF64Inst)                                                               \
-  X(FmaMixBF16Insts)                                                           \
-  X(FmaMixInsts)                                                               \
-  X(FP4ConversionScaleInsts)                                                   \
-  X(FP64)                                                                      \
-  X(FP6BF6ConversionScaleInsts)                                                \
-  X(FP8ConversionInsts)                                                        \
-  X(FP8ConversionScaleInsts)                                                   \
-  X(FP8E5M3Insts)                                                              \
-  X(FP8Insts)                                                                  \
-  X(FullRate64Ops)                                                             \
-  X(G16)                                                                       \
-  X(GDS)                                                                       \
-  X(GetWaveIdInst)                                                             \
-  X(GFX10_3Insts)                                                              \
-  X(GFX10_AEncoding)                                                           \
-  X(GFX10_BEncoding)                                                           \
-  X(GFX1250Insts)                                                              \
-  X(GFX90AInsts)                                                               \
-  X(GFX940Insts)                                                               \
-  X(GFX950Insts)                                                               \
-  X(GloballyAddressableScratch)                                                \
-  X(GWS)                                                                       \
-  X(HalfRate64Ops)                                                             \
-  X(IEEEMinimumMaximumInsts)                                                   \
-  X(ImageGather4D16Bug)                                                        \
-  X(ImageInsts)                                                                \
-  X(ImageStoreD16Bug)                                                          \
-  X(InstFwdPrefetchBug)                                                        \
-  X(IntClamp)                                                                  \
-  X(KernargPreload)                                                            \
-  X(LdsBarrierArriveAtomic)                                                    \
-  X(LdsBranchVmemWARHazard)                                                    \
-  X(LerpInst)                                                                  \
-  X(LshlAddU64Inst)                                                            \
-  X(MADIntraFwdBug)                                                            \
-  X(MadMixInsts)                                                               \
-  X(MadU32Inst)                                                                \
-  X(MAIInsts)                                                                  \
-  X(McastLoadInsts)                                                            \
-  X(MemoryAtomicFaddF32DenormalSupport)                                        \
-  X(MFMAInlineLiteralBug)                                                      \
-  X(MIMG_R128)                                                                 \
-  X(Min3Max3PKF16)                                                             \
-  X(Minimum3Maximum3F16)                                                       \
-  X(Minimum3Maximum3F32)                                                       \
-  X(Minimum3Maximum3PKF16)                                                     \
-  X(Movrel)                                                                    \
-  X(MSAALoadDstSelBug)                                                         \
-  X(NegativeScratchOffsetBug)                                                  \
-  X(NegativeUnalignedScratchOffsetBug)                                         \
-  X(NoDataDepHazard)                                                           \
-  X(NoSdstCMPX)                                                                \
-  X(NSAClauseBug)                                                              \
-  X(NSAEncoding)                                                               \
-  X(NSAtoVMEMBug)                                                              \
-  X(Offset3fBug)                                                               \
-  X(PackedFP32Ops)                                                             \
-  X(PackedTID)                                                                 \
-  X(PartialNSAEncoding)                                                        \
-  X(Permlane16Swap)                                                            \
-  X(Permlane32Swap)                                                            \
-  X(PkAddMinMaxInsts)                                                          \
-  X(PkFmacF16Inst)                                                             \
-  X(PointSampleAccel)                                                          \
-  X(PrivEnabledTrap2NopBug)                                                    \
-  X(PrngInst)                                                                  \
-  X(PseudoScalarTrans)                                                         \
-  X(QsadInsts)                                                                 \
-  X(R128A16)                                                                   \
-  X(RelaxedBufferOOBMode)                                                      \
-  X(RequiredExportPriority)                                                    \
-  X(RestrictedSOffset)                                                         \
-  X(SadInsts)                                                                  \
-  X(SafeCUPrefetch)                                                            \
-  X(SafeSmemPrefetch)                                                          \
-  X(SALUFloatInsts)                                                            \
-  X(ScalarAtomics)                                                             \
-  X(ScalarDwordx3Loads)                                                        \
-  X(ScalarFlatScratchInsts)                                                    \
-  X(ScalarStores)                                                              \
-  X(SDWAMac)                                                                   \
-  X(SDWAOmod)                                                                  \
-  X(SDWAOutModsVOPC)                                                           \
-  X(SDWAScalar)                                                                \
-  X(SDWASdst)                                                                  \
-  X(SetPrioIncWgInst)                                                          \
-  X(SetregVGPRMSBFixup)                                                        \
-  X(SGPRInitBug)                                                               \
-  X(ShaderCyclesHiLoRegisters)                                                 \
-  X(ShaderCyclesRegister)                                                      \
-  X(SMemRealTime)                                                              \
-  X(SMemTimeInst)                                                              \
-  X(SMEMtoVectorWriteHazard)                                                   \
-  X(SWakeupBarrier)                                                            \
-  X(TanhInsts)                                                                 \
-  X(TensorCvtLutInsts)                                                         \
-  X(TransposeLoadF4F6Insts)                                                    \
-  X(TrapHandler)                                                               \
-  X(TrigReducedRange)                                                          \
-  X(UnalignedAccessMode)                                                       \
-  X(UnalignedDSAccess)                                                         \
-  X(UnpackedD16VMem)                                                           \
-  X(VALUTransUseHazard)                                                        \
-  X(VcmpxExecWARHazard)                                                        \
-  X(VcmpxPermlaneHazard)                                                       \
-  X(VGPRIndexMode)                                                             \
-  X(VmemPrefInsts)                                                             \
-  X(VMemToLDSLoad)                                                             \
-  X(VMEMtoScalarWriteHazard)                                                   \
-  X(VmemWriteVgprInOrder)                                                      \
-  X(VOP3Literal)                                                               \
-  X(VOPDInsts)                                                                 \
-  X(Vscnt)                                                                     \
-  X(WaitXcnt)                                                                  \
-  X(XF32Insts)
-
 namespace llvm {
 
 class GCNTargetMachine;
@@ -310,38 +76,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   /// indicates a lack of S_CLAUSE support.
   unsigned MaxHardClauseLength = 0;
 
-  //===--------------------------------------------------------------------===//
-  /// Controlled by subtarget features defined in AMDGPU.td
-  //===--------------------------------------------------------------------===//
-  bool DumpCode = false;
-  bool EnableCuMode = false;
-  bool EnableDS128 = false;
-  bool EnableFlatScratch = false;
-  bool EnableLoadStoreOpt = false;
-  bool EnablePreciseMemory = false;
-  bool EnablePRTStrictNull = false;
-  bool EnableSIScheduler = false;
-  // This should not be used directly. 'TargetID' tracks the dynamic settings
-  // for SRAMECC.
-  bool EnableSRAMECC = false;
-  bool EnableTgSplit = false;
-  bool EnableUnsafeDSOffsetFolding = false;
-  bool EnableXNACK = false;
-  bool RequiresAlignVGPR = false;
-  bool RequiresCOV6 = false;
-  bool RequiresWaitsBeforeSystemScopeStores = false;
-  bool SupportsSRAMECC = false;
-  // This should not be used directly. 'TargetID' tracks the dynamic settings
-  // for XNACK.
-  bool SupportsXNACK = false;
-  bool UseAddPC64Inst = false;
-  bool UseBlockVGPROpsForCSR = false;
-  bool UseFlatForGlobal = false;
-
-#define DECL_HAS_MEMBER(Name) bool Has##Name = false;
-  GCN_SUBTARGET_HAS_FEATURE(DECL_HAS_MEMBER)
-  GCN_SUBTARGET_HAS_FEATURE_MEMBER_ONLY(DECL_HAS_MEMBER)
-#undef DECL_HAS_MEMBER
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
+  bool ATTRIBUTE = DEFAULT;
+#include "AMDGPUGenSubtargetInfo.inc"
 
 private:
   SIInstrInfo InstrInfo;
@@ -404,19 +141,14 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS);
 
-  // Simple subtarget feature getters - auto-generated from X-macro.
-  // Note: GCN_SUBTARGET_HAS_FEATURE_MEMBER_ONLY features don't get getters.
-#define DECL_HAS_GETTER(Name)                                                  \
-  bool has##Name() const { return Has##Name; }
-  GCN_SUBTARGET_HAS_FEATURE(DECL_HAS_GETTER)
-#undef DECL_HAS_GETTER
-#undef GCN_SUBTARGET_HAS_FEATURE
-#undef GCN_SUBTARGET_HAS_FEATURE_MEMBER_ONLY
-
   Generation getGeneration() const { return (Generation)Gen; }
 
   bool isGFX11Plus() const { return getGeneration() >= GFX11; }
 
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
+  bool GETTER() const override { return ATTRIBUTE; }
+#include "AMDGPUGenSubtargetInfo.inc"
+
   unsigned getMaxWaveScratchSize() const {
     // See COMPUTE_TMPRING_SIZE.WAVESIZE.
     if (getGeneration() >= GFX12) {
@@ -536,8 +268,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return getGeneration() <= SEA_ISLANDS ? 1 : 2;
   }
 
-  bool dumpCode() const { return DumpCode; }
-
   /// Return the amount of LDS that can be used that will not restrict the
   /// occupancy lower than WaveCount.
   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
@@ -552,8 +282,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return getGeneration() >= AMDGPUSubtarget::GFX10;
   }
 
-  bool useFlatForGlobal() const { return UseFlatForGlobal; }
-
   /// \returns If target supports ds_read/write_b128 and user enables generation
   /// of ds_read/write_b128.
   bool useDS128() const { return HasCIInsts && EnableDS128; }
@@ -772,11 +500,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   // FLAT GLOBAL VOffset is signed
   bool hasSignedGVSOffset() const { return HasGFX1250Insts; }
 
-  bool enableSIScheduler() const { return EnableSIScheduler; }
-
   bool loadStoreOptEnabled() const { return EnableLoadStoreOpt; }
 
-  bool hasUserSGPRInit16Bug() const {
+  bool hasUserSGPRInit16BugInWave32() const {
     return HasUserSGPRInit16Bug && isWave32();
   }
 
@@ -804,7 +530,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return getGeneration() == AMDGPUSubtarget::GFX9;
   }
 
-  bool hasLDSMisalignedBug() const {
+  bool hasLDSMisalignedBugInWGPMode() const {
     return HasLDSMisalignedBug && !EnableCuMode;
   }
 
@@ -903,8 +629,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasAddPC64Inst() const { return HasGFX1250Insts; }
 
-  bool useAddPC64Inst() const { return UseAddPC64Inst; }
-
   /// \returns true if the target supports expert scheduling mode 2 which relies
   /// on the compiler to insert waits to avoid hazards between VMEM and VALU
   /// instructions in some instances.
@@ -1265,12 +989,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return 3;
   }
 
-  /// \returns true if the sub-target supports buffer resource (V#) with 45-bit
-  /// num_records.
-  bool requiresWaitsBeforeSystemScopeStores() const {
-    return RequiresWaitsBeforeSystemScopeStores;
-  }
-
   bool supportsBPermute() const {
     return getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS;
   }
diff --git a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index 81b142e4e7b9e..248d734268ccf 100644
--- a/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -68,7 +68,7 @@ bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
       getLoopDepth() > 1)
     return true;
 
-  if (!ST->hasCFAluBug())
+  if (!ST->hasCFALUBug())
     return false;
 
   switch(Opcode) {
diff --git a/llvm/lib/Target/AMDGPU/R600Processors.td b/llvm/lib/Target/AMDGPU/R600Processors.td
index d30228b6970dd..dc21eb9ed011d 100644
--- a/llvm/lib/Target/AMDGPU/R600Processors.td
+++ b/llvm/lib/Target/AMDGPU/R600Processors.td
@@ -14,7 +14,7 @@ class SubtargetFeatureFetchLimit <string Value> :
 >;
 
 def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
-  "R600ALUInst",
+  "HasR600ALUInst",
   "false",
   "Older version of ALU instructions encoding"
 >;
@@ -29,13 +29,13 @@ def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
 >;
 
 def FeatureCaymanISA : SubtargetFeature<"caymanISA",
-  "CaymanISA",
+  "HasCaymanISA",
   "true",
   "Use Cayman ISA"
 >;
 
 def FeatureCFALUBug : SubtargetFeature<"cfalubug",
-  "CFALUBug",
+  "HasCFALUBug",
   "true",
   "GPU has CF_ALU bug"
 >;
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h
index feda2e5c6d0d1..a30b565003d68 100644
--- a/llvm/lib/Target/AMDGPU/R600Subtarget.h
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h
@@ -27,15 +27,14 @@ namespace llvm {
 
 class R600Subtarget final : public R600GenSubtargetInfo,
                             public AMDGPUSubtarget {
+
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
+  bool ATTRIBUTE = DEFAULT;
+#include "R600GenSubtargetInfo.inc"
+
 private:
   R600InstrInfo InstrInfo;
   R600FrameLowering FrameLowering;
-  bool HasFMA = false;
-  bool CaymanISA = false;
-  bool CFALUBug = false;
-  bool HasVertexCache = false;
-  bool R600ALUInst = false;
-  bool HasFP64 = false;
   short TexVTXClauseSize = 0;
   Generation Gen = R600;
   R600TargetLowering TLInfo;
@@ -102,10 +101,6 @@ class R600Subtarget final : public R600GenSubtargetInfo,
     return (getGeneration() >= EVERGREEN);
   }
 
-  bool hasCaymanISA() const {
-    return CaymanISA;
-  }
-
   bool hasFFBL() const {
     return (getGeneration() >= EVERGREEN);
   }
@@ -114,11 +109,9 @@ class R600Subtarget final : public R600GenSubtargetInfo,
     return (getGeneration() >= EVERGREEN);
   }
 
-  bool hasFMA() const { return HasFMA; }
-
-  bool hasCFAluBug() const { return CFALUBug; }
-
-  bool hasVertexCache() const { return HasVertexCache; }
+#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER)                    \
+  bool GETTER() const override { return ATTRIBUTE; }
+#include "R600GenSubtargetInfo.inc"
 
   short getTexVTXClauseSize() const { return TexVTXClauseSize; }
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 657eec1d93076..3097f0717bd2e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1970,7 +1970,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccessesImpl(
 
     Align RequiredAlignment(
         PowerOf2Ceil(divideCeil(Size, 8))); // Natural alignment.
-    if (Subtarget->hasLDSMisalignedBug() && Size > 32 &&
+    if (Subtarget->hasLDSMisalignedBugInWGPMode() && Size > 32 &&
         Alignment < RequiredAlignment)
       return false;
 
@@ -3032,7 +3032,7 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF,
                                            CallingConv::ID CallConv,
                                            bool IsShader) const {
   bool HasArchitectedSGPRs = Subtarget->hasArchitectedSGPRs();
-  if (Subtarget->hasUserSGPRInit16Bug() && !IsShader) {
+  if (Subtarget->hasUserSGPRInit16BugInWave32() && !IsShader) {
     // Note: user SGPRs are handled by the front-end for graphics shaders
     // Pad up the used user SGPRs with dead inputs.
 
@@ -3101,7 +3101,7 @@ void SITargetLowering::allocateSystemSGPRs(CCState &CCInfo, MachineFunction &MF,
     CCInfo.AllocateReg(PrivateSegmentWaveByteOffsetReg);
   }
 
-  assert(!Subtarget->hasUserSGPRInit16Bug() || IsShader ||
+  assert(!Subtarget->hasUserSGPRInit16BugInWave32() || IsShader ||
          Info.getNumPreloadedSGPRs() >= 16);
 }
 
@@ -12103,7 +12103,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 
   Align Alignment = Load->getAlign();
   unsigned AS = Load->getAddressSpace();
-  if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS &&
+  if (Subtarget->hasLDSMisalignedBugInWGPMode() &&
+      AS == AMDGPUAS::FLAT_ADDRESS &&
       Alignment.value() < MemVT.getStoreSize() && MemVT.getSizeInBits() > 32) {
     return SplitVectorLoad(Op, DAG);
   }
@@ -12727,7 +12728,8 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
          Store->getValue().getValueType().getScalarType() == MVT::i32);
 
   unsigned AS = Store->getAddressSpace();
-  if (Subtarget->hasLDSMisalignedBug() && AS == AMDGPUAS::FLAT_ADDRESS &&
+  if (Subtarget->hasLDSMisalignedBugInWGPMode() &&
+      AS == AMDGPUAS::FLAT_ADDRESS &&
       Store->getAlign().value() < VT.getStoreSize() &&
       VT.getSizeInBits() > 32) {
     return SplitVectorStore(Op, DAG);
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 349c158c8838d..17e45af50fb72 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -840,7 +840,7 @@ class WaitcntBrackets {
       return {{}, {}};
     const TargetRegisterClass *RC = Context->TRI->getPhysRegBaseClass(Reg);
     unsigned Size = Context->TRI->getRegSizeInBits(*RC);
-    if (Size == 16 && Context->ST->hasD16Writes32BitVgpr())
+    if (Size == 16 && Context->ST->enableD16Writes32BitVgpr())
       Reg = Context->TRI->get32BitRegister(Reg);
     return Context->TRI->regunits(Reg);
   }



More information about the llvm-commits mailing list