[llvm] r341982 - AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination

Benjamin Kramer via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 11 15:42:12 PDT 2018


On Tue, Sep 11, 2018 at 8:58 PM Konstantin Zhuravlyov via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: kzhuravl
> Date: Tue Sep 11 11:56:51 2018
> New Revision: 341982
>
> URL: http://llvm.org/viewvc/llvm-project?rev=341982&view=rev
> Log:
> AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination
> into TargetParser.
>
> Also switch away from target features to CPU string when
> determining isa version. This fixes an issue when we
> output wrong isa version in the object code when features
> of a particular CPU are altered (i.e. gfx902 w/o xnack
> used to result in gfx900).
>
> Differential Revision: https://reviews.llvm.org/D51890
>
>
> Added:
>     llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll
> Modified:
>     llvm/trunk/include/llvm/Support/TargetParser.h
>     llvm/trunk/lib/Support/TargetParser.cpp
>     llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
>     llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
>     llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
>     llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
>     llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
>     llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
>     llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
>     llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
>     llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
>     llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
>     llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
>     llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s
>
> Modified: llvm/trunk/include/llvm/Support/TargetParser.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/TargetParser.h?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/Support/TargetParser.h (original)
> +++ llvm/trunk/include/llvm/Support/TargetParser.h Tue Sep 11 11:56:51 2018
> @@ -320,6 +320,13 @@ enum GPUKind : uint32_t {
>    GK_AMDGCN_LAST = GK_GFX906,
>  };
>
> +/// Instruction set architecture version.
> +struct IsaVersion {
> +  unsigned Major;
> +  unsigned Minor;
> +  unsigned Stepping;
> +};
> +
>  // This isn't comprehensive for now, just things that are needed from the
>  // frontend driver.
>  enum ArchFeatureKind : uint32_t {
> @@ -335,18 +342,22 @@ enum ArchFeatureKind : uint32_t {
>    FEATURE_FAST_DENORMAL_F32 = 1 << 5
>  };
>
> -GPUKind parseArchAMDGCN(StringRef CPU);
> -GPUKind parseArchR600(StringRef CPU);
>  StringRef getArchNameAMDGCN(GPUKind AK);
>  StringRef getArchNameR600(GPUKind AK);
>  StringRef getCanonicalArchName(StringRef Arch);
> +GPUKind parseArchAMDGCN(StringRef CPU);
> +GPUKind parseArchR600(StringRef CPU);
>  unsigned getArchAttrAMDGCN(GPUKind AK);
>  unsigned getArchAttrR600(GPUKind AK);
>
>  void fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values);
>  void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
>
> -}
> +StringRef getArchNameFromElfMach(unsigned ElfMach);
> +unsigned getElfMach(StringRef GPU);
> +IsaVersion getIsaVersion(StringRef GPU);
> +
> +} // namespace AMDGPU
>
>  } // namespace llvm
>
>
> Modified: llvm/trunk/lib/Support/TargetParser.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/TargetParser.cpp?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Support/TargetParser.cpp (original)
> +++ llvm/trunk/lib/Support/TargetParser.cpp Tue Sep 11 11:56:51 2018
> @@ -17,11 +17,13 @@
>  #include "llvm/ADT/ArrayRef.h"
>  #include "llvm/ADT/StringSwitch.h"
>  #include "llvm/ADT/Twine.h"
> +#include "llvm/BinaryFormat/ELF.h"
>

This is a layering violation, lib/Support cannot depend on things in
BinaryFormat.


>  #include <cctype>
>
>  using namespace llvm;
>  using namespace ARM;
>  using namespace AArch64;
> +using namespace AMDGPU;
>
>  namespace {
>
> @@ -947,6 +949,8 @@ bool llvm::AArch64::isX18ReservedByDefau
>           TT.isOSWindows();
>  }
>
> +namespace {
> +
>  struct GPUInfo {
>    StringLiteral Name;
>    StringLiteral CanonicalName;
> @@ -954,11 +958,9 @@ struct GPUInfo {
>    unsigned Features;
>  };
>
> -using namespace AMDGPU;
> -static constexpr GPUInfo R600GPUs[26] = {
> -  // Name         Canonical    Kind       Features
> -  //              Name
> -  //
> +constexpr GPUInfo R600GPUs[26] = {
> +  // Name       Canonical    Kind        Features
> +  //            Name
>    {{"r600"},    {"r600"},    GK_R600,    FEATURE_NONE },
>    {{"rv630"},   {"r600"},    GK_R600,    FEATURE_NONE },
>    {{"rv635"},   {"r600"},    GK_R600,    FEATURE_NONE },
> @@ -989,9 +991,9 @@ static constexpr GPUInfo R600GPUs[26] =
>
>  // This table should be sorted by the value of GPUKind
>  // Don't bother listing the implicitly true features
> -static constexpr GPUInfo AMDGCNGPUs[32] = {
> -  // Name           Canonical    Kind      Features
> -  //                Name
> +constexpr GPUInfo AMDGCNGPUs[32] = {
> +  // Name         Canonical    Kind        Features
> +  //              Name
>    {{"gfx600"},    {"gfx600"},  GK_GFX600,  FEATURE_FAST_FMA_F32},
>    {{"tahiti"},    {"gfx600"},  GK_GFX600,  FEATURE_FAST_FMA_F32},
>    {{"gfx601"},    {"gfx601"},  GK_GFX601,  FEATURE_NONE},
> @@ -1026,8 +1028,7 @@ static constexpr GPUInfo AMDGCNGPUs[32]
>    {{"gfx906"},    {"gfx906"},  GK_GFX906,
> FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
>  };
>
> -static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK,
> -                                   ArrayRef<GPUInfo> Table) {
> +const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
>    GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
>
>    auto I = std::lower_bound(Table.begin(), Table.end(), Search,
> @@ -1040,6 +1041,8 @@ static const GPUInfo *getArchEntry(AMDGP
>    return I;
>  }
>
> +} // namespace
> +
>  StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
>    if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
>      return Entry->CanonicalName;
> @@ -1092,3 +1095,118 @@ void AMDGPU::fillValidArchListR600(Small
>    for (const auto C : R600GPUs)
>      Values.push_back(C.Name);
>  }
> +
> +StringRef AMDGPU::getArchNameFromElfMach(unsigned ElfMach) {
> +  AMDGPU::GPUKind AK;
> +
> +  switch (ElfMach) {
> +  case ELF::EF_AMDGPU_MACH_R600_R600:     AK = GK_R600;    break;
> +  case ELF::EF_AMDGPU_MACH_R600_R630:     AK = GK_R630;    break;
> +  case ELF::EF_AMDGPU_MACH_R600_RS880:    AK = GK_RS880;   break;
> +  case ELF::EF_AMDGPU_MACH_R600_RV670:    AK = GK_RV670;   break;
> +  case ELF::EF_AMDGPU_MACH_R600_RV710:    AK = GK_RV710;   break;
> +  case ELF::EF_AMDGPU_MACH_R600_RV730:    AK = GK_RV730;   break;
> +  case ELF::EF_AMDGPU_MACH_R600_RV770:    AK = GK_RV770;   break;
> +  case ELF::EF_AMDGPU_MACH_R600_CEDAR:    AK = GK_CEDAR;   break;
> +  case ELF::EF_AMDGPU_MACH_R600_CYPRESS:  AK = GK_CYPRESS; break;
> +  case ELF::EF_AMDGPU_MACH_R600_JUNIPER:  AK = GK_JUNIPER; break;
> +  case ELF::EF_AMDGPU_MACH_R600_REDWOOD:  AK = GK_REDWOOD; break;
> +  case ELF::EF_AMDGPU_MACH_R600_SUMO:     AK = GK_SUMO;    break;
> +  case ELF::EF_AMDGPU_MACH_R600_BARTS:    AK = GK_BARTS;   break;
> +  case ELF::EF_AMDGPU_MACH_R600_CAICOS:   AK = GK_CAICOS;  break;
> +  case ELF::EF_AMDGPU_MACH_R600_CAYMAN:   AK = GK_CAYMAN;  break;
> +  case ELF::EF_AMDGPU_MACH_R600_TURKS:    AK = GK_TURKS;   break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904;  break;
> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906;  break;
> +  case ELF::EF_AMDGPU_MACH_NONE:          AK = GK_NONE;    break;
> +  }
> +
> +  StringRef GPUName = getArchNameAMDGCN(AK);
> +  if (GPUName != "")
> +    return GPUName;
> +  return getArchNameR600(AK);
> +}
> +
> +unsigned AMDGPU::getElfMach(StringRef GPU) {
> +  AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
> +  if (AK == AMDGPU::GPUKind::GK_NONE)
> +    AK = parseArchR600(GPU);
> +
> +  switch (AK) {
> +  case GK_R600:    return ELF::EF_AMDGPU_MACH_R600_R600;
> +  case GK_R630:    return ELF::EF_AMDGPU_MACH_R600_R630;
> +  case GK_RS880:   return ELF::EF_AMDGPU_MACH_R600_RS880;
> +  case GK_RV670:   return ELF::EF_AMDGPU_MACH_R600_RV670;
> +  case GK_RV710:   return ELF::EF_AMDGPU_MACH_R600_RV710;
> +  case GK_RV730:   return ELF::EF_AMDGPU_MACH_R600_RV730;
> +  case GK_RV770:   return ELF::EF_AMDGPU_MACH_R600_RV770;
> +  case GK_CEDAR:   return ELF::EF_AMDGPU_MACH_R600_CEDAR;
> +  case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
> +  case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
> +  case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
> +  case GK_SUMO:    return ELF::EF_AMDGPU_MACH_R600_SUMO;
> +  case GK_BARTS:   return ELF::EF_AMDGPU_MACH_R600_BARTS;
> +  case GK_CAICOS:  return ELF::EF_AMDGPU_MACH_R600_CAICOS;
> +  case GK_CAYMAN:  return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
> +  case GK_TURKS:   return ELF::EF_AMDGPU_MACH_R600_TURKS;
> +  case GK_GFX600:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
> +  case GK_GFX601:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
> +  case GK_GFX700:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
> +  case GK_GFX701:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
> +  case GK_GFX702:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
> +  case GK_GFX703:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
> +  case GK_GFX704:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
> +  case GK_GFX801:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
> +  case GK_GFX802:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
> +  case GK_GFX803:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
> +  case GK_GFX810:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
> +  case GK_GFX900:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
> +  case GK_GFX902:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
> +  case GK_GFX904:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
> +  case GK_GFX906:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
> +  case GK_NONE:    return ELF::EF_AMDGPU_MACH_NONE;
> +  }
> +
> +  llvm_unreachable("unknown GPU");
> +}
> +
> +AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
> +  if (GPU == "generic")
> +    return {7, 0, 0};
> +
> +  AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
> +  if (AK == AMDGPU::GPUKind::GK_NONE)
> +    return {0, 0, 0};
> +
> +  switch (AK) {
> +  case GK_GFX600: return {6, 0, 0};
> +  case GK_GFX601: return {6, 0, 1};
> +  case GK_GFX700: return {7, 0, 0};
> +  case GK_GFX701: return {7, 0, 1};
> +  case GK_GFX702: return {7, 0, 2};
> +  case GK_GFX703: return {7, 0, 3};
> +  case GK_GFX704: return {7, 0, 4};
> +  case GK_GFX801: return {8, 0, 1};
> +  case GK_GFX802: return {8, 0, 2};
> +  case GK_GFX803: return {8, 0, 3};
> +  case GK_GFX810: return {8, 1, 0};
> +  case GK_GFX900: return {9, 0, 0};
> +  case GK_GFX902: return {9, 0, 2};
> +  case GK_GFX904: return {9, 0, 4};
> +  case GK_GFX906: return {9, 0, 6};
> +  default:        return {0, 0, 0};
> +  }
> +}
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Tue Sep 11 11:56:51
> 2018
> @@ -40,6 +40,7 @@
>  #include "llvm/MC/MCStreamer.h"
>  #include "llvm/Support/AMDGPUMetadata.h"
>  #include "llvm/Support/MathExtras.h"
> +#include "llvm/Support/TargetParser.h"
>  #include "llvm/Support/TargetRegistry.h"
>  #include "llvm/Target/TargetLoweringObjectFile.h"
>
> @@ -134,9 +135,9 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
>      getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
>
>    // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
> -  IsaInfo::IsaVersion ISA =
> IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
> +  IsaVersion Version = getIsaVersion(getSTI()->getCPU());
>    getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
> -      ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
> +      Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
>  }
>
>  void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
> @@ -240,7 +241,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyE
>        *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF,
> CurrentProgramInfo),
>        CurrentProgramInfo.NumVGPRsForWavesPerEU,
>        CurrentProgramInfo.NumSGPRsForWavesPerEU -
> -          IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
> +          IsaInfo::getNumExtraSGPRs(getSTI(),
>                                      CurrentProgramInfo.VCCUsed,
>                                      CurrentProgramInfo.FlatUsed),
>        CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
> @@ -561,7 +562,7 @@ static bool hasAnyNonFlatUseOfReg(const
>
>  int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
>    const GCNSubtarget &ST) const {
> -  return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
> +  return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
>                                                       UsesVCC,
> UsesFlatScratch);
>  }
>
> @@ -758,7 +759,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
>
>            // 48 SGPRs - vcc, - flat_scr, -xnack
>            int MaxSGPRGuess =
> -              47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
> +              47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
>                                               ST.hasFlatAddressSpace());
>            MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
>            MaxVGPR = std::max(MaxVGPR, 23);
> @@ -823,7 +824,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
>    // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could
> be
>    // unified.
>    unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
> -      STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
> +      getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
>
>    // Check the addressable register limit before we add ExtraSGPRs.
>    if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
> @@ -905,9 +906,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
>    }
>
>    ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
> -      STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
> +      getSTI(), ProgInfo.NumSGPRsForWavesPerEU);
>    ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
> -      STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
> +      getSTI(), ProgInfo.NumVGPRsForWavesPerEU);
>
>    // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
>    // DebuggerPrivateSegmentBufferSGPR fields if
> "amdgpu-debugger-emit-prologue"
> @@ -1137,7 +1138,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(
>    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
>    const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
>
> -  AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
> +  AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
>
>    Out.compute_pgm_resource_registers =
>        CurrentProgramInfo.ComputePGMRSrc1 |
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Tue Sep 11 11:56:51
> 2018
> @@ -124,10 +124,8 @@ GCNSubtarget::initializeSubtargetDepende
>    return *this;
>  }
>
> -AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
> -                                             const FeatureBitset
> &FeatureBits) :
> +AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
>    TargetTriple(TT),
> -  SubtargetFeatureBits(FeatureBits),
>    Has16BitInsts(false),
>    HasMadMixInsts(false),
>    FP32Denormals(false),
> @@ -144,9 +142,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
>    { }
>
>  GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
> -                                 const GCNTargetMachine &TM) :
> +                           const GCNTargetMachine &TM) :
>      AMDGPUGenSubtargetInfo(TT, GPU, FS),
> -    AMDGPUSubtarget(TT, getFeatureBits()),
> +    AMDGPUSubtarget(TT),
>      TargetTriple(TT),
>      Gen(SOUTHERN_ISLANDS),
>      IsaVersion(ISAVersion0_0_0),
> @@ -448,7 +446,7 @@ unsigned AMDGPUSubtarget::getKernArgSegm
>  R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef
> FS,
>                               const TargetMachine &TM) :
>    R600GenSubtargetInfo(TT, GPU, FS),
> -  AMDGPUSubtarget(TT, getFeatureBits()),
> +  AMDGPUSubtarget(TT),
>    InstrInfo(*this),
>    FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(),
> 0),
>    FMA(false),
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Tue Sep 11 11:56:51 2018
> @@ -63,7 +63,6 @@ private:
>    Triple TargetTriple;
>
>  protected:
> -  const FeatureBitset &SubtargetFeatureBits;
>    bool Has16BitInsts;
>    bool HasMadMixInsts;
>    bool FP32Denormals;
> @@ -79,7 +78,7 @@ protected:
>    unsigned WavefrontSize;
>
>  public:
> -  AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
> +  AMDGPUSubtarget(const Triple &TT);
>
>    static const AMDGPUSubtarget &get(const MachineFunction &MF);
>    static const AMDGPUSubtarget &get(const TargetMachine &TM,
> @@ -203,33 +202,21 @@ public:
>
>    /// \returns Maximum number of work groups per compute unit supported
> by the
>    /// subtarget and limited by given \p FlatWorkGroupSize.
> -  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
> -    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
> -                                                  FlatWorkGroupSize);
> -  }
> +  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize)
> const = 0;
>
>    /// \returns Minimum flat work group size supported by the subtarget.
> -  unsigned getMinFlatWorkGroupSize() const {
> -    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
> -  }
> +  virtual unsigned getMinFlatWorkGroupSize() const = 0;
>
>    /// \returns Maximum flat work group size supported by the subtarget.
> -  unsigned getMaxFlatWorkGroupSize() const {
> -    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
> -  }
> +  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
>
>    /// \returns Maximum number of waves per execution unit supported by the
>    /// subtarget and limited by given \p FlatWorkGroupSize.
> -  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
> -    return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
> -                                             FlatWorkGroupSize);
> -  }
> +  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const  =
> 0;
>
>    /// \returns Minimum number of waves per execution unit supported by the
>    /// subtarget.
> -  unsigned getMinWavesPerEU() const {
> -    return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
> -  }
> +  virtual unsigned getMinWavesPerEU() const = 0;
>
>    unsigned getMaxWavesPerEU() const { return 10; }
>
> @@ -708,20 +695,19 @@ public:
>    /// \returns Number of execution units per compute unit supported by the
>    /// subtarget.
>    unsigned getEUsPerCU() const {
> -    return
> AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
> +    return AMDGPU::IsaInfo::getEUsPerCU(this);
>    }
>
>    /// \returns Maximum number of waves per compute unit supported by the
>    /// subtarget without any kind of limitation.
>    unsigned getMaxWavesPerCU() const {
> -    return
> AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
> +    return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
>    }
>
>    /// \returns Maximum number of waves per compute unit supported by the
>    /// subtarget and limited by given \p FlatWorkGroupSize.
>    unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
> -    return
> AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
> -                                             FlatWorkGroupSize);
> +    return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
>    }
>
>    /// \returns Maximum number of waves per execution unit supported by the
> @@ -733,8 +719,7 @@ public:
>    /// \returns Number of waves per work group supported by the subtarget
> and
>    /// limited by given \p FlatWorkGroupSize.
>    unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
> -    return AMDGPU::IsaInfo::getWavesPerWorkGroup(
> -        MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
> +    return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
>    }
>
>    // static wrappers
> @@ -853,39 +838,34 @@ public:
>
>    /// \returns SGPR allocation granularity supported by the subtarget.
>    unsigned getSGPRAllocGranule() const {
> -    return AMDGPU::IsaInfo::getSGPRAllocGranule(
> -        MCSubtargetInfo::getFeatureBits());
> +    return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
>    }
>
>    /// \returns SGPR encoding granularity supported by the subtarget.
>    unsigned getSGPREncodingGranule() const {
> -    return AMDGPU::IsaInfo::getSGPREncodingGranule(
> -        MCSubtargetInfo::getFeatureBits());
> +    return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
>    }
>
>    /// \returns Total number of SGPRs supported by the subtarget.
>    unsigned getTotalNumSGPRs() const {
> -    return
> AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
> +    return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
>    }
>
>    /// \returns Addressable number of SGPRs supported by the subtarget.
>    unsigned getAddressableNumSGPRs() const {
> -    return AMDGPU::IsaInfo::getAddressableNumSGPRs(
> -        MCSubtargetInfo::getFeatureBits());
> +    return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
>    }
>
>    /// \returns Minimum number of SGPRs that meets the given number of
> waves per
>    /// execution unit requirement supported by the subtarget.
>    unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
> -    return
> AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
> -                                           WavesPerEU);
> +    return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
>    }
>
>    /// \returns Maximum number of SGPRs that meets the given number of
> waves per
>    /// execution unit requirement supported by the subtarget.
>    unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
> -    return
> AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
> -                                           WavesPerEU, Addressable);
> +    return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
>    }
>
>    /// \returns Reserved number of SGPRs for given function \p MF.
> @@ -903,39 +883,34 @@ public:
>
>    /// \returns VGPR allocation granularity supported by the subtarget.
>    unsigned getVGPRAllocGranule() const {
> -    return AMDGPU::IsaInfo::getVGPRAllocGranule(
> -        MCSubtargetInfo::getFeatureBits());
> +    return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
>    }
>
>    /// \returns VGPR encoding granularity supported by the subtarget.
>    unsigned getVGPREncodingGranule() const {
> -    return AMDGPU::IsaInfo::getVGPREncodingGranule(
> -        MCSubtargetInfo::getFeatureBits());
> +    return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
>    }
>
>    /// \returns Total number of VGPRs supported by the subtarget.
>    unsigned getTotalNumVGPRs() const {
> -    return
> AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
> +    return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
>    }
>
>    /// \returns Addressable number of VGPRs supported by the subtarget.
>    unsigned getAddressableNumVGPRs() const {
> -    return AMDGPU::IsaInfo::getAddressableNumVGPRs(
> -        MCSubtargetInfo::getFeatureBits());
> +    return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
>    }
>
>    /// \returns Minimum number of VGPRs that meets given number of waves
> per
>    /// execution unit requirement supported by the subtarget.
>    unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
> -    return
> AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
> -                                           WavesPerEU);
> +    return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
>    }
>
>    /// \returns Maximum number of VGPRs that meets given number of waves
> per
>    /// execution unit requirement supported by the subtarget.
>    unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
> -    return
> AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
> -                                           WavesPerEU);
> +    return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
>    }
>
>    /// \returns Maximum number of VGPRs that meets number of waves per
> execution
> @@ -951,6 +926,34 @@ public:
>    void getPostRAMutations(
>        std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
>        const override;
> +
> +  /// \returns Maximum number of work groups per compute unit supported
> by the
> +  /// subtarget and limited by given \p FlatWorkGroupSize.
> +  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const
> override {
> +    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this,
> FlatWorkGroupSize);
> +  }
> +
> +  /// \returns Minimum flat work group size supported by the subtarget.
> +  unsigned getMinFlatWorkGroupSize() const override {
> +    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
> +  }
> +
> +  /// \returns Maximum flat work group size supported by the subtarget.
> +  unsigned getMaxFlatWorkGroupSize() const override {
> +    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
> +  }
> +
> +  /// \returns Maximum number of waves per execution unit supported by the
> +  /// subtarget and limited by given \p FlatWorkGroupSize.
> +  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
> +    return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
> +  }
> +
> +  /// \returns Minimum number of waves per execution unit supported by the
> +  /// subtarget.
> +  unsigned getMinWavesPerEU() const override {
> +    return AMDGPU::IsaInfo::getMinWavesPerEU(this);
> +  }
>  };
>
>  class R600Subtarget final : public R600GenSubtargetInfo,
> @@ -1061,6 +1064,34 @@ public:
>    bool enableSubRegLiveness() const override {
>      return true;
>    }
> +
> +  /// \returns Maximum number of work groups per compute unit supported
> by the
> +  /// subtarget and limited by given \p FlatWorkGroupSize.
> +  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const
> override {
> +    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this,
> FlatWorkGroupSize);
> +  }
> +
> +  /// \returns Minimum flat work group size supported by the subtarget.
> +  unsigned getMinFlatWorkGroupSize() const override {
> +    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
> +  }
> +
> +  /// \returns Maximum flat work group size supported by the subtarget.
> +  unsigned getMaxFlatWorkGroupSize() const override {
> +    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
> +  }
> +
> +  /// \returns Maximum number of waves per execution unit supported by the
> +  /// subtarget and limited by given \p FlatWorkGroupSize.
> +  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
> +    return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
> +  }
> +
> +  /// \returns Minimum number of waves per execution unit supported by the
> +  /// subtarget.
> +  unsigned getMinWavesPerEU() const override {
> +    return AMDGPU::IsaInfo::getMinWavesPerEU(this);
> +  }
>  };
>
>  } // end namespace llvm
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Tue Sep 11
> 11:56:51 2018
> @@ -49,6 +49,7 @@
>  #include "llvm/Support/MachineValueType.h"
>  #include "llvm/Support/MathExtras.h"
>  #include "llvm/Support/SMLoc.h"
> +#include "llvm/Support/TargetParser.h"
>  #include "llvm/Support/TargetRegistry.h"
>  #include "llvm/Support/raw_ostream.h"
>  #include <algorithm>
> @@ -917,8 +918,7 @@ public:
>        // Currently there is none suitable machinery in the core llvm-mc
> for this.
>        // MCSymbol::isRedefinable is intended for another purpose, and
>        // AsmParser::parseDirectiveSet() cannot be specialized for
> specific target.
> -      AMDGPU::IsaInfo::IsaVersion ISA =
> -          AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
> +      AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
>        MCContext &Ctx = getContext();
>        if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
>          MCSymbol *Sym =
> @@ -1826,7 +1826,7 @@ bool AMDGPUAsmParser::updateGprCountSymb
>                                              unsigned DwordRegIndex,
>                                              unsigned RegWidth) {
>    // Symbols are only defined for GCN targets
> -  if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
> +  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
>      return true;
>
>    auto SymbolName = getGprCountSymbolName(RegKind);
> @@ -2637,18 +2637,18 @@ bool AMDGPUAsmParser::calculateGPRBlocks
>      unsigned &SGPRBlocks) {
>    // TODO(scott.linder): These calculations are duplicated from
>    // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
> -  IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
> +  IsaVersion Version = getIsaVersion(getSTI().getCPU());
>
>    unsigned NumVGPRs = NextFreeVGPR;
>    unsigned NumSGPRs = NextFreeSGPR;
> -  unsigned MaxAddressableNumSGPRs =
> IsaInfo::getAddressableNumSGPRs(Features);
> +  unsigned MaxAddressableNumSGPRs =
> IsaInfo::getAddressableNumSGPRs(&getSTI());
>
>    if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
>        NumSGPRs > MaxAddressableNumSGPRs)
>      return OutOfRangeError(SGPRRange);
>
>    NumSGPRs +=
> -      IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
> XNACKUsed);
> +      IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed,
> XNACKUsed);
>
>    if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
>        NumSGPRs > MaxAddressableNumSGPRs)
> @@ -2657,8 +2657,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks
>    if (Features.test(FeatureSGPRInitBug))
>      NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
>
> -  VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
> -  SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
> +  VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
> +  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
>
>    return false;
>  }
> @@ -2678,8 +2678,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
>
>    StringSet<> Seen;
>
> -  IsaInfo::IsaVersion IVersion =
> -      IsaInfo::getIsaVersion(getSTI().getFeatureBits());
> +  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
>
>    SMRange VGPRRange;
>    uint64_t NextFreeVGPR = 0;
> @@ -2938,8 +2937,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAC
>    // If this directive has no arguments, then use the ISA version for the
>    // targeted GPU.
>    if (getLexer().is(AsmToken::EndOfStatement)) {
> -    AMDGPU::IsaInfo::IsaVersion ISA =
> -        AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
> +    AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
>      getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major,
> ISA.Minor,
>                                                        ISA.Stepping,
>                                                        "AMD", "AMDGPU");
> @@ -3001,7 +2999,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCode
>
>  bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
>    amd_kernel_code_t Header;
> -  AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
> +  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
>
>    while (true) {
>      // Lex EndOfStatement.  This is in a while loop, because lexing a
> comment
> @@ -3679,12 +3677,12 @@ void AMDGPUAsmParser::cvtExp(MCInst &Ins
>
>  static bool
>  encodeCnt(
> -  const AMDGPU::IsaInfo::IsaVersion ISA,
> +  const AMDGPU::IsaVersion ISA,
>    int64_t &IntVal,
>    int64_t CntVal,
>    bool Saturate,
> -  unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned,
> unsigned),
> -  unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
> +  unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
> +  unsigned (*decode)(const IsaVersion &Version, unsigned))
>  {
>    bool Failed = false;
>
> @@ -3715,8 +3713,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &
>    if (getParser().parseAbsoluteExpression(CntVal))
>      return true;
>
> -  AMDGPU::IsaInfo::IsaVersion ISA =
> -      AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
> +  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
>
>    bool Failed = true;
>    bool Sat = CntName.endswith("_sat");
> @@ -3751,8 +3748,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &
>
>  OperandMatchResultTy
>  AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
> -  AMDGPU::IsaInfo::IsaVersion ISA =
> -      AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
> +  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
>    int64_t Waitcnt = getWaitcntBitMask(ISA);
>    SMLoc S = Parser.getTok().getLoc();
>
>
> Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
> (original)
> +++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Tue Sep
> 11 11:56:51 2018
> @@ -1155,8 +1155,7 @@ void AMDGPUInstPrinter::printSwizzle(con
>  void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
>                                        const MCSubtargetInfo &STI,
>                                        raw_ostream &O) {
> -  AMDGPU::IsaInfo::IsaVersion ISA =
> -      AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
> +  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
>
>    unsigned SImm16 = MI->getOperand(OpNo).getImm();
>    unsigned Vmcnt, Expcnt, Lgkmcnt;
>
> Modified:
> llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
> (original)
> +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp Tue
> Sep 11 11:56:51 2018
> @@ -27,6 +27,7 @@
>  #include "llvm/MC/MCObjectFileInfo.h"
>  #include "llvm/MC/MCSectionELF.h"
>  #include "llvm/Support/FormattedStream.h"
> +#include "llvm/Support/TargetParser.h"
>
>  namespace llvm {
>  #include "AMDGPUPTNote.h"
> @@ -39,84 +40,6 @@ using namespace llvm::AMDGPU;
>  // AMDGPUTargetStreamer
>
>  //===----------------------------------------------------------------------===//
>
> -static const struct {
> -  const char *Name;
> -  unsigned Mach;
> -} MachTable[] = {
> -      // Radeon HD 2000/3000 Series (R600).
> -      { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
> -      { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
> -      { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
> -      { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
> -      // Radeon HD 4000 Series (R700).
> -      { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
> -      { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
> -      { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
> -      // Radeon HD 5000 Series (Evergreen).
> -      { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
> -      { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
> -      { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
> -      { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
> -      { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
> -      // Radeon HD 6000 Series (Northern Islands).
> -      { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
> -      { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
> -      { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
> -      { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
> -      // AMDGCN GFX6.
> -      { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
> -      { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
> -      { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
> -      { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
> -      { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
> -      { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
> -      { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
> -      // AMDGCN GFX7.
> -      { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
> -      { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
> -      { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
> -      { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
> -      { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
> -      { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
> -      { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
> -      { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
> -      { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
> -      { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
> -      // AMDGCN GFX8.
> -      { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
> -      { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
> -      { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
> -      { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
> -      { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
> -      { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
> -      { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
> -      { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
> -      { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
> -      { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
> -      { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
> -      // AMDGCN GFX9.
> -      { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
> -      { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
> -      { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
> -      { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
> -      // Not specified processor.
> -      { nullptr, ELF::EF_AMDGPU_MACH_NONE }
> -};
> -
> -unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
> -  auto Entry = MachTable;
> -  for (; Entry->Name && GPU != Entry->Name; ++Entry)
> -    ;
> -  return Entry->Mach;
> -}
> -
> -const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
> -  auto Entry = MachTable;
> -  for (; Entry->Name && Mach != Entry->Mach; ++Entry)
> -    ;
> -  return Entry->Name;
> -}
> -
>  bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
>    HSAMD::Metadata HSAMetadata;
>    if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
> @@ -205,7 +128,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsa
>      bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
>    amdhsa::kernel_descriptor_t DefaultKD =
> getDefaultAmdhsaKernelDescriptor();
>
> -  IsaInfo::IsaVersion IVersion =
> IsaInfo::getIsaVersion(STI.getFeatureBits());
> +  IsaVersion IVersion = getIsaVersion(STI.getCPU());
>
>    OS << "\t.amdhsa_kernel " << KernelName << '\n';
>
> @@ -342,7 +265,7 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELF
>    unsigned EFlags = MCA.getELFHeaderEFlags();
>
>    EFlags &= ~ELF::EF_AMDGPU_MACH;
> -  EFlags |= getMACH(STI.getCPU());
> +  EFlags |= getElfMach(STI.getCPU());
>
>    EFlags &= ~ELF::EF_AMDGPU_XNACK;
>    if (AMDGPU::hasXNACK(STI))
>
> Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
> (original)
> +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h Tue
> Sep 11 11:56:51 2018
> @@ -31,13 +31,7 @@ class AMDGPUTargetStreamer : public MCTa
>  protected:
>    MCContext &getContext() const { return Streamer.getContext(); }
>
> -  /// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name.
> -  unsigned getMACH(StringRef GPU) const;
> -
>  public:
> -  /// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value.
> -  static const char *getMachName(unsigned Mach);
> -
>    AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
>
>    virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Tue Sep 11 11:56:51
> 2018
> @@ -369,7 +369,7 @@ private:
>    const SIRegisterInfo *TRI = nullptr;
>    const MachineRegisterInfo *MRI = nullptr;
>    const MachineLoopInfo *MLI = nullptr;
> -  AMDGPU::IsaInfo::IsaVersion IV;
> +  AMDGPU::IsaVersion IV;
>
>    DenseSet<MachineBasicBlock *> BlockVisitedSet;
>    DenseSet<MachineInstr *> TrackedWaitcntSet;
> @@ -1841,7 +1841,7 @@ bool SIInsertWaitcnts::runOnMachineFunct
>    TRI = &TII->getRegisterInfo();
>    MRI = &MF.getRegInfo();
>    MLI = &getAnalysis<MachineLoopInfo>();
> -  IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
> +  IV = AMDGPU::getIsaVersion(ST->getCPU());
>    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
>
>    ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp Tue Sep 11 11:56:51
> 2018
> @@ -253,7 +253,7 @@ protected:
>    /// Instruction info.
>    const SIInstrInfo *TII = nullptr;
>
> -  IsaInfo::IsaVersion IV;
> +  IsaVersion IV;
>
>    SICacheControl(const GCNSubtarget &ST);
>
> @@ -605,7 +605,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::get
>
>  SICacheControl::SICacheControl(const GCNSubtarget &ST) {
>    TII = ST.getInstrInfo();
> -  IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
> +  IV = getIsaVersion(ST.getCPU());
>  }
>
>  /* static */
>
> Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Tue Sep 11
> 11:56:51 2018
> @@ -137,68 +137,18 @@ int getMCOpcode(uint16_t Opcode, unsigne
>
>  namespace IsaInfo {
>
> -IsaVersion getIsaVersion(const FeatureBitset &Features) {
> -  // GCN GFX6 (Southern Islands (SI)).
> -  if (Features.test(FeatureISAVersion6_0_0))
> -    return {6, 0, 0};
> -  if (Features.test(FeatureISAVersion6_0_1))
> -    return {6, 0, 1};
> -
> -  // GCN GFX7 (Sea Islands (CI)).
> -  if (Features.test(FeatureISAVersion7_0_0))
> -    return {7, 0, 0};
> -  if (Features.test(FeatureISAVersion7_0_1))
> -    return {7, 0, 1};
> -  if (Features.test(FeatureISAVersion7_0_2))
> -    return {7, 0, 2};
> -  if (Features.test(FeatureISAVersion7_0_3))
> -    return {7, 0, 3};
> -  if (Features.test(FeatureISAVersion7_0_4))
> -    return {7, 0, 4};
> -  if (Features.test(FeatureSeaIslands))
> -    return {7, 0, 0};
> -
> -  // GCN GFX8 (Volcanic Islands (VI)).
> -  if (Features.test(FeatureISAVersion8_0_1))
> -    return {8, 0, 1};
> -  if (Features.test(FeatureISAVersion8_0_2))
> -    return {8, 0, 2};
> -  if (Features.test(FeatureISAVersion8_0_3))
> -    return {8, 0, 3};
> -  if (Features.test(FeatureISAVersion8_1_0))
> -    return {8, 1, 0};
> -  if (Features.test(FeatureVolcanicIslands))
> -    return {8, 0, 0};
> -
> -  // GCN GFX9.
> -  if (Features.test(FeatureISAVersion9_0_0))
> -    return {9, 0, 0};
> -  if (Features.test(FeatureISAVersion9_0_2))
> -    return {9, 0, 2};
> -  if (Features.test(FeatureISAVersion9_0_4))
> -    return {9, 0, 4};
> -  if (Features.test(FeatureISAVersion9_0_6))
> -    return {9, 0, 6};
> -  if (Features.test(FeatureGFX9))
> -    return {9, 0, 0};
> -
> -  if (Features.test(FeatureSouthernIslands))
> -    return {0, 0, 0};
> -  return {7, 0, 0};
> -}
> -
>  void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
>    auto TargetTriple = STI->getTargetTriple();
> -  auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
> +  auto Version = getIsaVersion(STI->getCPU());
>
>    Stream << TargetTriple.getArchName() << '-'
>           << TargetTriple.getVendorName() << '-'
>           << TargetTriple.getOSName() << '-'
>           << TargetTriple.getEnvironmentName() << '-'
>           << "gfx"
> -         << ISAVersion.Major
> -         << ISAVersion.Minor
> -         << ISAVersion.Stepping;
> +         << Version.Major
> +         << Version.Minor
> +         << Version.Stepping;
>
>    if (hasXNACK(*STI))
>      Stream << "+xnack";
> @@ -210,49 +160,49 @@ bool hasCodeObjectV3(const MCSubtargetIn
>    return STI->getFeatureBits().test(FeatureCodeObjectV3);
>  }
>
> -unsigned getWavefrontSize(const FeatureBitset &Features) {
> -  if (Features.test(FeatureWavefrontSize16))
> +unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
> +  if (STI->getFeatureBits().test(FeatureWavefrontSize16))
>      return 16;
> -  if (Features.test(FeatureWavefrontSize32))
> +  if (STI->getFeatureBits().test(FeatureWavefrontSize32))
>      return 32;
>
>    return 64;
>  }
>
> -unsigned getLocalMemorySize(const FeatureBitset &Features) {
> -  if (Features.test(FeatureLocalMemorySize32768))
> +unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
> +  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
>      return 32768;
> -  if (Features.test(FeatureLocalMemorySize65536))
> +  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
>      return 65536;
>
>    return 0;
>  }
>
> -unsigned getEUsPerCU(const FeatureBitset &Features) {
> +unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
>    return 4;
>  }
>
> -unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
> +unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
>                                 unsigned FlatWorkGroupSize) {
> -  if (!Features.test(FeatureGCN))
> +  if (!STI->getFeatureBits().test(FeatureGCN))
>      return 8;
> -  unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
> +  unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
>    if (N == 1)
>      return 40;
>    N = 40 / N;
>    return std::min(N, 16u);
>  }
>
> -unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
> -  return getMaxWavesPerEU() * getEUsPerCU(Features);
> +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
> +  return getMaxWavesPerEU() * getEUsPerCU(STI);
>  }
>
> -unsigned getMaxWavesPerCU(const FeatureBitset &Features,
> +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
>                            unsigned FlatWorkGroupSize) {
> -  return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
> +  return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
>  }
>
> -unsigned getMinWavesPerEU(const FeatureBitset &Features) {
> +unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
>    return 1;
>  }
>
> @@ -261,89 +211,89 @@ unsigned getMaxWavesPerEU() {
>    return 10;
>  }
>
> -unsigned getMaxWavesPerEU(const FeatureBitset &Features,
> +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
>                            unsigned FlatWorkGroupSize) {
> -  return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
> -                 getEUsPerCU(Features)) / getEUsPerCU(Features);
> +  return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
> +                 getEUsPerCU(STI)) / getEUsPerCU(STI);
>  }
>
> -unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
> +unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
>    return 1;
>  }
>
> -unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
> +unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
>    return 2048;
>  }
>
> -unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
> +unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
>                                unsigned FlatWorkGroupSize) {
> -  return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
> -                 getWavefrontSize(Features);
> +  return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
> +                 getWavefrontSize(STI);
>  }
>
> -unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
> -  IsaVersion Version = getIsaVersion(Features);
> +unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
> +  IsaVersion Version = getIsaVersion(STI->getCPU());
>    if (Version.Major >= 8)
>      return 16;
>    return 8;
>  }
>
> -unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
> +unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
>    return 8;
>  }
>
> -unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
> -  IsaVersion Version = getIsaVersion(Features);
> +unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
> +  IsaVersion Version = getIsaVersion(STI->getCPU());
>    if (Version.Major >= 8)
>      return 800;
>    return 512;
>  }
>
> -unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
> -  if (Features.test(FeatureSGPRInitBug))
> +unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
> +  if (STI->getFeatureBits().test(FeatureSGPRInitBug))
>      return FIXED_NUM_SGPRS_FOR_INIT_BUG;
>
> -  IsaVersion Version = getIsaVersion(Features);
> +  IsaVersion Version = getIsaVersion(STI->getCPU());
>    if (Version.Major >= 8)
>      return 102;
>    return 104;
>  }
>
> -unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned
> WavesPerEU) {
> +unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
>    assert(WavesPerEU != 0);
>
>    if (WavesPerEU >= getMaxWavesPerEU())
>      return 0;
>
> -  unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
> -  if (Features.test(FeatureTrapHandler))
> +  unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
> +  if (STI->getFeatureBits().test(FeatureTrapHandler))
>      MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
> -  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
> -  return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
> +  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
> +  return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
>  }
>
> -unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned
> WavesPerEU,
> +unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
>                          bool Addressable) {
>    assert(WavesPerEU != 0);
>
> -  IsaVersion Version = getIsaVersion(Features);
> -  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
> +  IsaVersion Version = getIsaVersion(STI->getCPU());
> +  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
>    if (Version.Major >= 8 && !Addressable)
>      AddressableNumSGPRs = 112;
> -  unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
> -  if (Features.test(FeatureTrapHandler))
> +  unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
> +  if (STI->getFeatureBits().test(FeatureTrapHandler))
>      MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
> -  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
> +  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
>    return std::min(MaxNumSGPRs, AddressableNumSGPRs);
>  }
>
> -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
> +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
>                            bool FlatScrUsed, bool XNACKUsed) {
>    unsigned ExtraSGPRs = 0;
>    if (VCCUsed)
>      ExtraSGPRs = 2;
>
> -  IsaVersion Version = getIsaVersion(Features);
> +  IsaVersion Version = getIsaVersion(STI->getCPU());
>    if (Version.Major < 8) {
>      if (FlatScrUsed)
>        ExtraSGPRs = 4;
> @@ -358,74 +308,74 @@ unsigned getNumExtraSGPRs(const FeatureB
>    return ExtraSGPRs;
>  }
>
> -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
> +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
>                            bool FlatScrUsed) {
> -  return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
> -                          Features[AMDGPU::FeatureXNACK]);
> +  return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
> +
> STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
>  }
>
> -unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned
> NumSGPRs) {
> -  NumSGPRs = alignTo(std::max(1u, NumSGPRs),
> getSGPREncodingGranule(Features));
> +unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
> +  NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
>    // SGPRBlocks is actual number of SGPR blocks minus 1.
> -  return NumSGPRs / getSGPREncodingGranule(Features) - 1;
> +  return NumSGPRs / getSGPREncodingGranule(STI) - 1;
>  }
>
> -unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
> +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
>    return 4;
>  }
>
> -unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
> -  return getVGPRAllocGranule(Features);
> +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
> +  return getVGPRAllocGranule(STI);
>  }
>
> -unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
> +unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
>    return 256;
>  }
>
> -unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
> -  return getTotalNumVGPRs(Features);
> +unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
> +  return getTotalNumVGPRs(STI);
>  }
>
> -unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned
> WavesPerEU) {
> +unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
>    assert(WavesPerEU != 0);
>
>    if (WavesPerEU >= getMaxWavesPerEU())
>      return 0;
>    unsigned MinNumVGPRs =
> -      alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
> -                getVGPRAllocGranule(Features)) + 1;
> -  return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
> +      alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
> +                getVGPRAllocGranule(STI)) + 1;
> +  return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
>  }
>
> -unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned
> WavesPerEU) {
> +unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
>    assert(WavesPerEU != 0);
>
> -  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) /
> WavesPerEU,
> -                                   getVGPRAllocGranule(Features));
> -  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
> +  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
> +                                   getVGPRAllocGranule(STI));
> +  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
>    return std::min(MaxNumVGPRs, AddressableNumVGPRs);
>  }
>
> -unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned
> NumVGPRs) {
> -  NumVGPRs = alignTo(std::max(1u, NumVGPRs),
> getVGPREncodingGranule(Features));
> +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
> +  NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
>    // VGPRBlocks is actual number of VGPR blocks minus 1.
> -  return NumVGPRs / getVGPREncodingGranule(Features) - 1;
> +  return NumVGPRs / getVGPREncodingGranule(STI) - 1;
>  }
>
>  } // end namespace IsaInfo
>
>  void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
> -                               const FeatureBitset &Features) {
> -  IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
> +                               const MCSubtargetInfo *STI) {
> +  IsaVersion Version = getIsaVersion(STI->getCPU());
>
>    memset(&Header, 0, sizeof(Header));
>
>    Header.amd_kernel_code_version_major = 1;
>    Header.amd_kernel_code_version_minor = 2;
>    Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
> -  Header.amd_machine_version_major = ISA.Major;
> -  Header.amd_machine_version_minor = ISA.Minor;
> -  Header.amd_machine_version_stepping = ISA.Stepping;
> +  Header.amd_machine_version_major = Version.Major;
> +  Header.amd_machine_version_minor = Version.Minor;
> +  Header.amd_machine_version_stepping = Version.Stepping;
>    Header.kernel_code_entry_byte_offset = sizeof(Header);
>    // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
>    Header.wavefront_size = 6;
> @@ -513,7 +463,7 @@ std::pair<int, int> getIntegerPairAttrib
>    return Ints;
>  }
>
> -unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
> +unsigned getVmcntBitMask(const IsaVersion &Version) {
>    unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
>    if (Version.Major < 9)
>      return VmcntLo;
> @@ -522,15 +472,15 @@ unsigned getVmcntBitMask(const IsaInfo::
>    return VmcntLo | VmcntHi;
>  }
>
> -unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
> +unsigned getExpcntBitMask(const IsaVersion &Version) {
>    return (1 << getExpcntBitWidth()) - 1;
>  }
>
> -unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
> +unsigned getLgkmcntBitMask(const IsaVersion &Version) {
>    return (1 << getLgkmcntBitWidth()) - 1;
>  }
>
> -unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
> +unsigned getWaitcntBitMask(const IsaVersion &Version) {
>    unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(),
> getVmcntBitWidthLo());
>    unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
>    unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
> getLgkmcntBitWidth());
> @@ -542,7 +492,7 @@ unsigned getWaitcntBitMask(const IsaInfo
>    return Waitcnt | VmcntHi;
>  }
>
> -unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned
> Waitcnt) {
> +unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
>    unsigned VmcntLo =
>        unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
>    if (Version.Major < 9)
> @@ -554,22 +504,22 @@ unsigned decodeVmcnt(const IsaInfo::IsaV
>    return VmcntLo | VmcntHi;
>  }
>
> -unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned
> Waitcnt) {
> +unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
>    return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
>  }
>
> -unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned
> Waitcnt) {
> +unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
>    return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
>  }
>
> -void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
> +void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
>                     unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
>    Vmcnt = decodeVmcnt(Version, Waitcnt);
>    Expcnt = decodeExpcnt(Version, Waitcnt);
>    Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
>  }
>
> -unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
> +unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
>                       unsigned Vmcnt) {
>    Waitcnt =
>        packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(),
> getVmcntBitWidthLo());
> @@ -580,17 +530,17 @@ unsigned encodeVmcnt(const IsaInfo::IsaV
>    return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(),
> getVmcntBitWidthHi());
>  }
>
> -unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned
> Waitcnt,
> +unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
>                        unsigned Expcnt) {
>    return packBits(Expcnt, Waitcnt, getExpcntBitShift(),
> getExpcntBitWidth());
>  }
>
> -unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned
> Waitcnt,
> +unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
>                         unsigned Lgkmcnt) {
>    return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
> getLgkmcntBitWidth());
>  }
>
> -unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
> +unsigned encodeWaitcnt(const IsaVersion &Version,
>                         unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt)
> {
>    unsigned Waitcnt = getWaitcntBitMask(Version);
>    Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
>
> Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=341982&r1=341981&r2=341982&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Tue Sep 11
> 11:56:51 2018
> @@ -19,6 +19,7 @@
>  #include "llvm/Support/AMDHSAKernelDescriptor.h"
>  #include "llvm/Support/Compiler.h"
>  #include "llvm/Support/ErrorHandling.h"
> +#include "llvm/Support/TargetParser.h"
>  #include <cstdint>
>  #include <string>
>  #include <utility>
> @@ -56,16 +57,6 @@ enum {
>    TRAP_NUM_SGPRS = 16
>  };
>
> -/// Instruction set architecture version.
> -struct IsaVersion {
> -  unsigned Major;
> -  unsigned Minor;
> -  unsigned Stepping;
> -};
> -
> -/// \returns Isa version for given subtarget \p Features.
> -IsaVersion getIsaVersion(const FeatureBitset &Features);
> -
>  /// Streams isa version string for given subtarget \p STI into \p Stream.
>  void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
>
> @@ -73,114 +64,114 @@ void streamIsaVersion(const MCSubtargetI
>  /// false otherwise.
>  bool hasCodeObjectV3(const MCSubtargetInfo *STI);
>
> -/// \returns Wavefront size for given subtarget \p Features.
> -unsigned getWavefrontSize(const FeatureBitset &Features);
> +/// \returns Wavefront size for given subtarget \p STI.
> +unsigned getWavefrontSize(const MCSubtargetInfo *STI);
>
> -/// \returns Local memory size in bytes for given subtarget \p Features.
> -unsigned getLocalMemorySize(const FeatureBitset &Features);
> +/// \returns Local memory size in bytes for given subtarget \p STI.
> +unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
>
>  /// \returns Number of execution units per compute unit for given
> subtarget \p
> -/// Features.
> -unsigned getEUsPerCU(const FeatureBitset &Features);
> +/// STI.
> +unsigned getEUsPerCU(const MCSubtargetInfo *STI);
>
>  /// \returns Maximum number of work groups per compute unit for given
> subtarget
> -/// \p Features and limited by given \p FlatWorkGroupSize.
> -unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
> +/// \p STI and limited by given \p FlatWorkGroupSize.
> +unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
>                                 unsigned FlatWorkGroupSize);
>
>  /// \returns Maximum number of waves per compute unit for given subtarget
> \p
> -/// Features without any kind of limitation.
> -unsigned getMaxWavesPerCU(const FeatureBitset &Features);
> +/// STI without any kind of limitation.
> +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
>
>  /// \returns Maximum number of waves per compute unit for given subtarget
> \p
> -/// Features and limited by given \p FlatWorkGroupSize.
> -unsigned getMaxWavesPerCU(const FeatureBitset &Features,
> +/// STI and limited by given \p FlatWorkGroupSize.
> +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
>                            unsigned FlatWorkGroupSize);
>
>  /// \returns Minimum number of waves per execution unit for given
> subtarget \p
> -/// Features.
> -unsigned getMinWavesPerEU(const FeatureBitset &Features);
> +/// STI.
> +unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
>
>  /// \returns Maximum number of waves per execution unit for given
> subtarget \p
> -/// Features without any kind of limitation.
> +/// STI without any kind of limitation.
>  unsigned getMaxWavesPerEU();
>
>  /// \returns Maximum number of waves per execution unit for given
> subtarget \p
> -/// Features and limited by given \p FlatWorkGroupSize.
> -unsigned getMaxWavesPerEU(const FeatureBitset &Features,
> +/// STI and limited by given \p FlatWorkGroupSize.
> +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
>                            unsigned FlatWorkGroupSize);
>
> -/// \returns Minimum flat work group size for given subtarget \p Features.
> -unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
> +/// \returns Minimum flat work group size for given subtarget \p STI.
> +unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
>
> -/// \returns Maximum flat work group size for given subtarget \p Features.
> -unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
> +/// \returns Maximum flat work group size for given subtarget \p STI.
> +unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
>
> -/// \returns Number of waves per work group for given subtarget \p
> Features and
> +/// \returns Number of waves per work group for given subtarget \p STI and
>  /// limited by given \p FlatWorkGroupSize.
> -unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
> +unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
>                                unsigned FlatWorkGroupSize);
>
> -/// \returns SGPR allocation granularity for given subtarget \p Features.
> -unsigned getSGPRAllocGranule(const FeatureBitset &Features);
> +/// \returns SGPR allocation granularity for given subtarget \p STI.
> +unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
>
> -/// \returns SGPR encoding granularity for given subtarget \p Features.
> -unsigned getSGPREncodingGranule(const FeatureBitset &Features);
> +/// \returns SGPR encoding granularity for given subtarget \p STI.
> +unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
>
> -/// \returns Total number of SGPRs for given subtarget \p Features.
> -unsigned getTotalNumSGPRs(const FeatureBitset &Features);
> +/// \returns Total number of SGPRs for given subtarget \p STI.
> +unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
>
> -/// \returns Addressable number of SGPRs for given subtarget \p Features.
> -unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
> +/// \returns Addressable number of SGPRs for given subtarget \p STI.
> +unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
>
>  /// \returns Minimum number of SGPRs that meets the given number of waves
> per
> -/// execution unit requirement for given subtarget \p Features.
> -unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned
> WavesPerEU);
> +/// execution unit requirement for given subtarget \p STI.
> +unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
>
>  /// \returns Maximum number of SGPRs that meets the given number of waves
> per
> -/// execution unit requirement for given subtarget \p Features.
> -unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned
> WavesPerEU,
> +/// execution unit requirement for given subtarget \p STI.
> +unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
>                          bool Addressable);
>
>  /// \returns Number of extra SGPRs implicitly required by given subtarget
> \p
> -/// Features when the given special registers are used.
> -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
> +/// STI when the given special registers are used.
> +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
>                            bool FlatScrUsed, bool XNACKUsed);
>
>  /// \returns Number of extra SGPRs implicitly required by given subtarget
> \p
> -/// Features when the given special registers are used. XNACK is inferred
> from
> -/// \p Features.
> -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
> +/// STI when the given special registers are used. XNACK is inferred from
> +/// \p STI.
> +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
>                            bool FlatScrUsed);
>
> -/// \returns Number of SGPR blocks needed for given subtarget \p Features
> when
> +/// \returns Number of SGPR blocks needed for given subtarget \p STI when
>  /// \p NumSGPRs are used. \p NumSGPRs should already include any special
>  /// register counts.
> -unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned
> NumSGPRs);
> +unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
>
> -/// \returns VGPR allocation granularity for given subtarget \p Features.
> -unsigned getVGPRAllocGranule(const FeatureBitset &Features);
> +/// \returns VGPR allocation granularity for given subtarget \p STI.
> +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
>
> -/// \returns VGPR encoding granularity for given subtarget \p Features.
> -unsigned getVGPREncodingGranule(const FeatureBitset &Features);
> +/// \returns VGPR encoding granularity for given subtarget \p STI.
> +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
>
> -/// \returns Total number of VGPRs for given subtarget \p Features.
> -unsigned getTotalNumVGPRs(const FeatureBitset &Features);
> +/// \returns Total number of VGPRs for given subtarget \p STI.
> +unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
>
> -/// \returns Addressable number of VGPRs for given subtarget \p Features.
> -unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
> +/// \returns Addressable number of VGPRs for given subtarget \p STI.
> +unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
>
>  /// \returns Minimum number of VGPRs that meets given number of waves per
> -/// execution unit requirement for given subtarget \p Features.
> -unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned
> WavesPerEU);
> +/// execution unit requirement for given subtarget \p STI.
> +unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
>
>  /// \returns Maximum number of VGPRs that meets given number of waves per
> -/// execution unit requirement for given subtarget \p Features.
> -unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned
> WavesPerEU);
> +/// execution unit requirement for given subtarget \p STI.
> +unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
>
> -/// \returns Number of VGPR blocks needed for given subtarget \p Features
> when
> +/// \returns Number of VGPR blocks needed for given subtarget \p STI when
>  /// \p NumVGPRs are used.
> -unsigned
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180912/a1e3854a/attachment-0001.html>


More information about the llvm-commits mailing list