[llvm] r341982 - AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination

Ilya Biryukov via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 12 00:08:05 PDT 2018


Reverted in r342023 to unbreak our integrate. Please fix the layering
violation before resubmitting.
Sorry for the inconvenience, but there were plenty of broken revisions, so
wanted to unbreak as soon as possible.


On Wed, Sep 12, 2018 at 12:42 AM Benjamin Kramer via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

>
>
> On Tue, Sep 11, 2018 at 8:58 PM Konstantin Zhuravlyov via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: kzhuravl
>> Date: Tue Sep 11 11:56:51 2018
>> New Revision: 341982
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=341982&view=rev
>> Log:
>> AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination
>> into TargetParser.
>>
>> Also switch away from target features to CPU string when
>> determining isa version. This fixes an issue when we
>> output wrong isa version in the object code when features
>> of a particular CPU are altered (i.e. gfx902 w/o xnack
>> used to result in gfx900).
>>
>> Differential Revision: https://reviews.llvm.org/D51890
>>
>>
>> Added:
>>     llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll
>> Modified:
>>     llvm/trunk/include/llvm/Support/TargetParser.h
>>     llvm/trunk/lib/Support/TargetParser.cpp
>>     llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
>>     llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
>>     llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
>>     llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
>>     llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
>>     llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
>>     llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
>>     llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
>>     llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
>>     llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
>>     llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
>>     llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s
>>
>> Modified: llvm/trunk/include/llvm/Support/TargetParser.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/TargetParser.h?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/include/llvm/Support/TargetParser.h (original)
>> +++ llvm/trunk/include/llvm/Support/TargetParser.h Tue Sep 11 11:56:51
>> 2018
>> @@ -320,6 +320,13 @@ enum GPUKind : uint32_t {
>>    GK_AMDGCN_LAST = GK_GFX906,
>>  };
>>
>> +/// Instruction set architecture version.
>> +struct IsaVersion {
>> +  unsigned Major;
>> +  unsigned Minor;
>> +  unsigned Stepping;
>> +};
>> +
>>  // This isn't comprehensive for now, just things that are needed from the
>>  // frontend driver.
>>  enum ArchFeatureKind : uint32_t {
>> @@ -335,18 +342,22 @@ enum ArchFeatureKind : uint32_t {
>>    FEATURE_FAST_DENORMAL_F32 = 1 << 5
>>  };
>>
>> -GPUKind parseArchAMDGCN(StringRef CPU);
>> -GPUKind parseArchR600(StringRef CPU);
>>  StringRef getArchNameAMDGCN(GPUKind AK);
>>  StringRef getArchNameR600(GPUKind AK);
>>  StringRef getCanonicalArchName(StringRef Arch);
>> +GPUKind parseArchAMDGCN(StringRef CPU);
>> +GPUKind parseArchR600(StringRef CPU);
>>  unsigned getArchAttrAMDGCN(GPUKind AK);
>>  unsigned getArchAttrR600(GPUKind AK);
>>
>>  void fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values);
>>  void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
>>
>> -}
>> +StringRef getArchNameFromElfMach(unsigned ElfMach);
>> +unsigned getElfMach(StringRef GPU);
>> +IsaVersion getIsaVersion(StringRef GPU);
>> +
>> +} // namespace AMDGPU
>>
>>  } // namespace llvm
>>
>>
>> Modified: llvm/trunk/lib/Support/TargetParser.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/TargetParser.cpp?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Support/TargetParser.cpp (original)
>> +++ llvm/trunk/lib/Support/TargetParser.cpp Tue Sep 11 11:56:51 2018
>> @@ -17,11 +17,13 @@
>>  #include "llvm/ADT/ArrayRef.h"
>>  #include "llvm/ADT/StringSwitch.h"
>>  #include "llvm/ADT/Twine.h"
>> +#include "llvm/BinaryFormat/ELF.h"
>>
>
> This is a layering violation, lib/Support cannot depend on things in
> BinaryFormat.
>
>
>>  #include <cctype>
>>
>>  using namespace llvm;
>>  using namespace ARM;
>>  using namespace AArch64;
>> +using namespace AMDGPU;
>>
>>  namespace {
>>
>> @@ -947,6 +949,8 @@ bool llvm::AArch64::isX18ReservedByDefau
>>           TT.isOSWindows();
>>  }
>>
>> +namespace {
>> +
>>  struct GPUInfo {
>>    StringLiteral Name;
>>    StringLiteral CanonicalName;
>> @@ -954,11 +958,9 @@ struct GPUInfo {
>>    unsigned Features;
>>  };
>>
>> -using namespace AMDGPU;
>> -static constexpr GPUInfo R600GPUs[26] = {
>> -  // Name         Canonical    Kind       Features
>> -  //              Name
>> -  //
>> +constexpr GPUInfo R600GPUs[26] = {
>> +  // Name       Canonical    Kind        Features
>> +  //            Name
>>    {{"r600"},    {"r600"},    GK_R600,    FEATURE_NONE },
>>    {{"rv630"},   {"r600"},    GK_R600,    FEATURE_NONE },
>>    {{"rv635"},   {"r600"},    GK_R600,    FEATURE_NONE },
>> @@ -989,9 +991,9 @@ static constexpr GPUInfo R600GPUs[26] =
>>
>>  // This table should be sorted by the value of GPUKind
>>  // Don't bother listing the implicitly true features
>> -static constexpr GPUInfo AMDGCNGPUs[32] = {
>> -  // Name           Canonical    Kind      Features
>> -  //                Name
>> +constexpr GPUInfo AMDGCNGPUs[32] = {
>> +  // Name         Canonical    Kind        Features
>> +  //              Name
>>    {{"gfx600"},    {"gfx600"},  GK_GFX600,  FEATURE_FAST_FMA_F32},
>>    {{"tahiti"},    {"gfx600"},  GK_GFX600,  FEATURE_FAST_FMA_F32},
>>    {{"gfx601"},    {"gfx601"},  GK_GFX601,  FEATURE_NONE},
>> @@ -1026,8 +1028,7 @@ static constexpr GPUInfo AMDGCNGPUs[32]
>>    {{"gfx906"},    {"gfx906"},  GK_GFX906,
>> FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
>>  };
>>
>> -static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK,
>> -                                   ArrayRef<GPUInfo> Table) {
>> +const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table)
>> {
>>    GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
>>
>>    auto I = std::lower_bound(Table.begin(), Table.end(), Search,
>> @@ -1040,6 +1041,8 @@ static const GPUInfo *getArchEntry(AMDGP
>>    return I;
>>  }
>>
>> +} // namespace
>> +
>>  StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
>>    if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
>>      return Entry->CanonicalName;
>> @@ -1092,3 +1095,118 @@ void AMDGPU::fillValidArchListR600(Small
>>    for (const auto C : R600GPUs)
>>      Values.push_back(C.Name);
>>  }
>> +
>> +StringRef AMDGPU::getArchNameFromElfMach(unsigned ElfMach) {
>> +  AMDGPU::GPUKind AK;
>> +
>> +  switch (ElfMach) {
>> +  case ELF::EF_AMDGPU_MACH_R600_R600:     AK = GK_R600;    break;
>> +  case ELF::EF_AMDGPU_MACH_R600_R630:     AK = GK_R630;    break;
>> +  case ELF::EF_AMDGPU_MACH_R600_RS880:    AK = GK_RS880;   break;
>> +  case ELF::EF_AMDGPU_MACH_R600_RV670:    AK = GK_RV670;   break;
>> +  case ELF::EF_AMDGPU_MACH_R600_RV710:    AK = GK_RV710;   break;
>> +  case ELF::EF_AMDGPU_MACH_R600_RV730:    AK = GK_RV730;   break;
>> +  case ELF::EF_AMDGPU_MACH_R600_RV770:    AK = GK_RV770;   break;
>> +  case ELF::EF_AMDGPU_MACH_R600_CEDAR:    AK = GK_CEDAR;   break;
>> +  case ELF::EF_AMDGPU_MACH_R600_CYPRESS:  AK = GK_CYPRESS; break;
>> +  case ELF::EF_AMDGPU_MACH_R600_JUNIPER:  AK = GK_JUNIPER; break;
>> +  case ELF::EF_AMDGPU_MACH_R600_REDWOOD:  AK = GK_REDWOOD; break;
>> +  case ELF::EF_AMDGPU_MACH_R600_SUMO:     AK = GK_SUMO;    break;
>> +  case ELF::EF_AMDGPU_MACH_R600_BARTS:    AK = GK_BARTS;   break;
>> +  case ELF::EF_AMDGPU_MACH_R600_CAICOS:   AK = GK_CAICOS;  break;
>> +  case ELF::EF_AMDGPU_MACH_R600_CAYMAN:   AK = GK_CAYMAN;  break;
>> +  case ELF::EF_AMDGPU_MACH_R600_TURKS:    AK = GK_TURKS;   break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904;  break;
>> +  case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906;  break;
>> +  case ELF::EF_AMDGPU_MACH_NONE:          AK = GK_NONE;    break;
>> +  }
>> +
>> +  StringRef GPUName = getArchNameAMDGCN(AK);
>> +  if (GPUName != "")
>> +    return GPUName;
>> +  return getArchNameR600(AK);
>> +}
>> +
>> +unsigned AMDGPU::getElfMach(StringRef GPU) {
>> +  AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
>> +  if (AK == AMDGPU::GPUKind::GK_NONE)
>> +    AK = parseArchR600(GPU);
>> +
>> +  switch (AK) {
>> +  case GK_R600:    return ELF::EF_AMDGPU_MACH_R600_R600;
>> +  case GK_R630:    return ELF::EF_AMDGPU_MACH_R600_R630;
>> +  case GK_RS880:   return ELF::EF_AMDGPU_MACH_R600_RS880;
>> +  case GK_RV670:   return ELF::EF_AMDGPU_MACH_R600_RV670;
>> +  case GK_RV710:   return ELF::EF_AMDGPU_MACH_R600_RV710;
>> +  case GK_RV730:   return ELF::EF_AMDGPU_MACH_R600_RV730;
>> +  case GK_RV770:   return ELF::EF_AMDGPU_MACH_R600_RV770;
>> +  case GK_CEDAR:   return ELF::EF_AMDGPU_MACH_R600_CEDAR;
>> +  case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
>> +  case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
>> +  case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
>> +  case GK_SUMO:    return ELF::EF_AMDGPU_MACH_R600_SUMO;
>> +  case GK_BARTS:   return ELF::EF_AMDGPU_MACH_R600_BARTS;
>> +  case GK_CAICOS:  return ELF::EF_AMDGPU_MACH_R600_CAICOS;
>> +  case GK_CAYMAN:  return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
>> +  case GK_TURKS:   return ELF::EF_AMDGPU_MACH_R600_TURKS;
>> +  case GK_GFX600:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
>> +  case GK_GFX601:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
>> +  case GK_GFX700:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
>> +  case GK_GFX701:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
>> +  case GK_GFX702:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
>> +  case GK_GFX703:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
>> +  case GK_GFX704:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
>> +  case GK_GFX801:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
>> +  case GK_GFX802:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
>> +  case GK_GFX803:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
>> +  case GK_GFX810:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
>> +  case GK_GFX900:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
>> +  case GK_GFX902:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
>> +  case GK_GFX904:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
>> +  case GK_GFX906:  return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
>> +  case GK_NONE:    return ELF::EF_AMDGPU_MACH_NONE;
>> +  }
>> +
>> +  llvm_unreachable("unknown GPU");
>> +}
>> +
>> +AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
>> +  if (GPU == "generic")
>> +    return {7, 0, 0};
>> +
>> +  AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
>> +  if (AK == AMDGPU::GPUKind::GK_NONE)
>> +    return {0, 0, 0};
>> +
>> +  switch (AK) {
>> +  case GK_GFX600: return {6, 0, 0};
>> +  case GK_GFX601: return {6, 0, 1};
>> +  case GK_GFX700: return {7, 0, 0};
>> +  case GK_GFX701: return {7, 0, 1};
>> +  case GK_GFX702: return {7, 0, 2};
>> +  case GK_GFX703: return {7, 0, 3};
>> +  case GK_GFX704: return {7, 0, 4};
>> +  case GK_GFX801: return {8, 0, 1};
>> +  case GK_GFX802: return {8, 0, 2};
>> +  case GK_GFX803: return {8, 0, 3};
>> +  case GK_GFX810: return {8, 1, 0};
>> +  case GK_GFX900: return {9, 0, 0};
>> +  case GK_GFX902: return {9, 0, 2};
>> +  case GK_GFX904: return {9, 0, 4};
>> +  case GK_GFX906: return {9, 0, 6};
>> +  default:        return {0, 0, 0};
>> +  }
>> +}
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Tue Sep 11 11:56:51
>> 2018
>> @@ -40,6 +40,7 @@
>>  #include "llvm/MC/MCStreamer.h"
>>  #include "llvm/Support/AMDGPUMetadata.h"
>>  #include "llvm/Support/MathExtras.h"
>> +#include "llvm/Support/TargetParser.h"
>>  #include "llvm/Support/TargetRegistry.h"
>>  #include "llvm/Target/TargetLoweringObjectFile.h"
>>
>> @@ -134,9 +135,9 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
>>      getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
>>
>>    // HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
>> -  IsaInfo::IsaVersion ISA =
>> IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
>> +  IsaVersion Version = getIsaVersion(getSTI()->getCPU());
>>    getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
>> -      ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
>> +      Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
>>  }
>>
>>  void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
>> @@ -240,7 +241,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyE
>>        *getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF,
>> CurrentProgramInfo),
>>        CurrentProgramInfo.NumVGPRsForWavesPerEU,
>>        CurrentProgramInfo.NumSGPRsForWavesPerEU -
>> -          IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
>> +          IsaInfo::getNumExtraSGPRs(getSTI(),
>>                                      CurrentProgramInfo.VCCUsed,
>>                                      CurrentProgramInfo.FlatUsed),
>>        CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
>> @@ -561,7 +562,7 @@ static bool hasAnyNonFlatUseOfReg(const
>>
>>  int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
>>    const GCNSubtarget &ST) const {
>> -  return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
>> +  return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
>>                                                       UsesVCC,
>> UsesFlatScratch);
>>  }
>>
>> @@ -758,7 +759,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
>>
>>            // 48 SGPRs - vcc, - flat_scr, -xnack
>>            int MaxSGPRGuess =
>> -              47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
>> +              47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
>>                                               ST.hasFlatAddressSpace());
>>            MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
>>            MaxVGPR = std::max(MaxVGPR, 23);
>> @@ -823,7 +824,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
>>    // duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and
>> could be
>>    // unified.
>>    unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
>> -      STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
>> +      getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
>>
>>    // Check the addressable register limit before we add ExtraSGPRs.
>>    if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
>> @@ -905,9 +906,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
>>    }
>>
>>    ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
>> -      STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
>> +      getSTI(), ProgInfo.NumSGPRsForWavesPerEU);
>>    ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
>> -      STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
>> +      getSTI(), ProgInfo.NumVGPRsForWavesPerEU);
>>
>>    // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
>>    // DebuggerPrivateSegmentBufferSGPR fields if
>> "amdgpu-debugger-emit-prologue"
>> @@ -1137,7 +1138,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(
>>    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
>>    const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
>>
>> -  AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
>> +  AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
>>
>>    Out.compute_pgm_resource_registers =
>>        CurrentProgramInfo.ComputePGMRSrc1 |
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Tue Sep 11 11:56:51
>> 2018
>> @@ -124,10 +124,8 @@ GCNSubtarget::initializeSubtargetDepende
>>    return *this;
>>  }
>>
>> -AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
>> -                                             const FeatureBitset
>> &FeatureBits) :
>> +AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
>>    TargetTriple(TT),
>> -  SubtargetFeatureBits(FeatureBits),
>>    Has16BitInsts(false),
>>    HasMadMixInsts(false),
>>    FP32Denormals(false),
>> @@ -144,9 +142,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
>>    { }
>>
>>  GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
>> -                                 const GCNTargetMachine &TM) :
>> +                           const GCNTargetMachine &TM) :
>>      AMDGPUGenSubtargetInfo(TT, GPU, FS),
>> -    AMDGPUSubtarget(TT, getFeatureBits()),
>> +    AMDGPUSubtarget(TT),
>>      TargetTriple(TT),
>>      Gen(SOUTHERN_ISLANDS),
>>      IsaVersion(ISAVersion0_0_0),
>> @@ -448,7 +446,7 @@ unsigned AMDGPUSubtarget::getKernArgSegm
>>  R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef
>> FS,
>>                               const TargetMachine &TM) :
>>    R600GenSubtargetInfo(TT, GPU, FS),
>> -  AMDGPUSubtarget(TT, getFeatureBits()),
>> +  AMDGPUSubtarget(TT),
>>    InstrInfo(*this),
>>    FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(),
>> 0),
>>    FMA(false),
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Tue Sep 11 11:56:51
>> 2018
>> @@ -63,7 +63,6 @@ private:
>>    Triple TargetTriple;
>>
>>  protected:
>> -  const FeatureBitset &SubtargetFeatureBits;
>>    bool Has16BitInsts;
>>    bool HasMadMixInsts;
>>    bool FP32Denormals;
>> @@ -79,7 +78,7 @@ protected:
>>    unsigned WavefrontSize;
>>
>>  public:
>> -  AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
>> +  AMDGPUSubtarget(const Triple &TT);
>>
>>    static const AMDGPUSubtarget &get(const MachineFunction &MF);
>>    static const AMDGPUSubtarget &get(const TargetMachine &TM,
>> @@ -203,33 +202,21 @@ public:
>>
>>    /// \returns Maximum number of work groups per compute unit supported
>> by the
>>    /// subtarget and limited by given \p FlatWorkGroupSize.
>> -  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
>> -    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
>> -                                                  FlatWorkGroupSize);
>> -  }
>> +  virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize)
>> const = 0;
>>
>>    /// \returns Minimum flat work group size supported by the subtarget.
>> -  unsigned getMinFlatWorkGroupSize() const {
>> -    return
>> AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
>> -  }
>> +  virtual unsigned getMinFlatWorkGroupSize() const = 0;
>>
>>    /// \returns Maximum flat work group size supported by the subtarget.
>> -  unsigned getMaxFlatWorkGroupSize() const {
>> -    return
>> AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
>> -  }
>> +  virtual unsigned getMaxFlatWorkGroupSize() const = 0;
>>
>>    /// \returns Maximum number of waves per execution unit supported by
>> the
>>    /// subtarget and limited by given \p FlatWorkGroupSize.
>> -  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
>> -    return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
>> -                                             FlatWorkGroupSize);
>> -  }
>> +  virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const  =
>> 0;
>>
>>    /// \returns Minimum number of waves per execution unit supported by
>> the
>>    /// subtarget.
>> -  unsigned getMinWavesPerEU() const {
>> -    return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
>> -  }
>> +  virtual unsigned getMinWavesPerEU() const = 0;
>>
>>    unsigned getMaxWavesPerEU() const { return 10; }
>>
>> @@ -708,20 +695,19 @@ public:
>>    /// \returns Number of execution units per compute unit supported by
>> the
>>    /// subtarget.
>>    unsigned getEUsPerCU() const {
>> -    return
>> AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
>> +    return AMDGPU::IsaInfo::getEUsPerCU(this);
>>    }
>>
>>    /// \returns Maximum number of waves per compute unit supported by the
>>    /// subtarget without any kind of limitation.
>>    unsigned getMaxWavesPerCU() const {
>> -    return
>> AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
>> +    return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
>>    }
>>
>>    /// \returns Maximum number of waves per compute unit supported by the
>>    /// subtarget and limited by given \p FlatWorkGroupSize.
>>    unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
>> -    return
>> AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
>> -                                             FlatWorkGroupSize);
>> +    return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
>>    }
>>
>>    /// \returns Maximum number of waves per execution unit supported by
>> the
>> @@ -733,8 +719,7 @@ public:
>>    /// \returns Number of waves per work group supported by the subtarget
>> and
>>    /// limited by given \p FlatWorkGroupSize.
>>    unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
>> -    return AMDGPU::IsaInfo::getWavesPerWorkGroup(
>> -        MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
>> +    return AMDGPU::IsaInfo::getWavesPerWorkGroup(this,
>> FlatWorkGroupSize);
>>    }
>>
>>    // static wrappers
>> @@ -853,39 +838,34 @@ public:
>>
>>    /// \returns SGPR allocation granularity supported by the subtarget.
>>    unsigned getSGPRAllocGranule() const {
>> -    return AMDGPU::IsaInfo::getSGPRAllocGranule(
>> -        MCSubtargetInfo::getFeatureBits());
>> +    return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
>>    }
>>
>>    /// \returns SGPR encoding granularity supported by the subtarget.
>>    unsigned getSGPREncodingGranule() const {
>> -    return AMDGPU::IsaInfo::getSGPREncodingGranule(
>> -        MCSubtargetInfo::getFeatureBits());
>> +    return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
>>    }
>>
>>    /// \returns Total number of SGPRs supported by the subtarget.
>>    unsigned getTotalNumSGPRs() const {
>> -    return
>> AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
>> +    return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
>>    }
>>
>>    /// \returns Addressable number of SGPRs supported by the subtarget.
>>    unsigned getAddressableNumSGPRs() const {
>> -    return AMDGPU::IsaInfo::getAddressableNumSGPRs(
>> -        MCSubtargetInfo::getFeatureBits());
>> +    return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
>>    }
>>
>>    /// \returns Minimum number of SGPRs that meets the given number of
>> waves per
>>    /// execution unit requirement supported by the subtarget.
>>    unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
>> -    return
>> AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
>> -                                           WavesPerEU);
>> +    return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
>>    }
>>
>>    /// \returns Maximum number of SGPRs that meets the given number of
>> waves per
>>    /// execution unit requirement supported by the subtarget.
>>    unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
>> -    return
>> AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
>> -                                           WavesPerEU, Addressable);
>> +    return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU,
>> Addressable);
>>    }
>>
>>    /// \returns Reserved number of SGPRs for given function \p MF.
>> @@ -903,39 +883,34 @@ public:
>>
>>    /// \returns VGPR allocation granularity supported by the subtarget.
>>    unsigned getVGPRAllocGranule() const {
>> -    return AMDGPU::IsaInfo::getVGPRAllocGranule(
>> -        MCSubtargetInfo::getFeatureBits());
>> +    return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
>>    }
>>
>>    /// \returns VGPR encoding granularity supported by the subtarget.
>>    unsigned getVGPREncodingGranule() const {
>> -    return AMDGPU::IsaInfo::getVGPREncodingGranule(
>> -        MCSubtargetInfo::getFeatureBits());
>> +    return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
>>    }
>>
>>    /// \returns Total number of VGPRs supported by the subtarget.
>>    unsigned getTotalNumVGPRs() const {
>> -    return
>> AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
>> +    return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
>>    }
>>
>>    /// \returns Addressable number of VGPRs supported by the subtarget.
>>    unsigned getAddressableNumVGPRs() const {
>> -    return AMDGPU::IsaInfo::getAddressableNumVGPRs(
>> -        MCSubtargetInfo::getFeatureBits());
>> +    return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
>>    }
>>
>>    /// \returns Minimum number of VGPRs that meets given number of waves
>> per
>>    /// execution unit requirement supported by the subtarget.
>>    unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
>> -    return
>> AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
>> -                                           WavesPerEU);
>> +    return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
>>    }
>>
>>    /// \returns Maximum number of VGPRs that meets given number of waves
>> per
>>    /// execution unit requirement supported by the subtarget.
>>    unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
>> -    return
>> AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
>> -                                           WavesPerEU);
>> +    return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
>>    }
>>
>>    /// \returns Maximum number of VGPRs that meets number of waves per
>> execution
>> @@ -951,6 +926,34 @@ public:
>>    void getPostRAMutations(
>>        std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
>>        const override;
>> +
>> +  /// \returns Maximum number of work groups per compute unit supported
>> by the
>> +  /// subtarget and limited by given \p FlatWorkGroupSize.
>> +  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const
>> override {
>> +    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this,
>> FlatWorkGroupSize);
>> +  }
>> +
>> +  /// \returns Minimum flat work group size supported by the subtarget.
>> +  unsigned getMinFlatWorkGroupSize() const override {
>> +    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
>> +  }
>> +
>> +  /// \returns Maximum flat work group size supported by the subtarget.
>> +  unsigned getMaxFlatWorkGroupSize() const override {
>> +    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
>> +  }
>> +
>> +  /// \returns Maximum number of waves per execution unit supported by
>> the
>> +  /// subtarget and limited by given \p FlatWorkGroupSize.
>> +  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
>> +    return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
>> +  }
>> +
>> +  /// \returns Minimum number of waves per execution unit supported by
>> the
>> +  /// subtarget.
>> +  unsigned getMinWavesPerEU() const override {
>> +    return AMDGPU::IsaInfo::getMinWavesPerEU(this);
>> +  }
>>  };
>>
>>  class R600Subtarget final : public R600GenSubtargetInfo,
>> @@ -1061,6 +1064,34 @@ public:
>>    bool enableSubRegLiveness() const override {
>>      return true;
>>    }
>> +
>> +  /// \returns Maximum number of work groups per compute unit supported
>> by the
>> +  /// subtarget and limited by given \p FlatWorkGroupSize.
>> +  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const
>> override {
>> +    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this,
>> FlatWorkGroupSize);
>> +  }
>> +
>> +  /// \returns Minimum flat work group size supported by the subtarget.
>> +  unsigned getMinFlatWorkGroupSize() const override {
>> +    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
>> +  }
>> +
>> +  /// \returns Maximum flat work group size supported by the subtarget.
>> +  unsigned getMaxFlatWorkGroupSize() const override {
>> +    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
>> +  }
>> +
>> +  /// \returns Maximum number of waves per execution unit supported by
>> the
>> +  /// subtarget and limited by given \p FlatWorkGroupSize.
>> +  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
>> +    return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
>> +  }
>> +
>> +  /// \returns Minimum number of waves per execution unit supported by
>> the
>> +  /// subtarget.
>> +  unsigned getMinWavesPerEU() const override {
>> +    return AMDGPU::IsaInfo::getMinWavesPerEU(this);
>> +  }
>>  };
>>
>>  } // end namespace llvm
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Tue Sep 11
>> 11:56:51 2018
>> @@ -49,6 +49,7 @@
>>  #include "llvm/Support/MachineValueType.h"
>>  #include "llvm/Support/MathExtras.h"
>>  #include "llvm/Support/SMLoc.h"
>> +#include "llvm/Support/TargetParser.h"
>>  #include "llvm/Support/TargetRegistry.h"
>>  #include "llvm/Support/raw_ostream.h"
>>  #include <algorithm>
>> @@ -917,8 +918,7 @@ public:
>>        // Currently there is none suitable machinery in the core llvm-mc
>> for this.
>>        // MCSymbol::isRedefinable is intended for another purpose, and
>>        // AsmParser::parseDirectiveSet() cannot be specialized for
>> specific target.
>> -      AMDGPU::IsaInfo::IsaVersion ISA =
>> -          AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
>> +      AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
>>        MCContext &Ctx = getContext();
>>        if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()))
>> {
>>          MCSymbol *Sym =
>> @@ -1826,7 +1826,7 @@ bool AMDGPUAsmParser::updateGprCountSymb
>>                                              unsigned DwordRegIndex,
>>                                              unsigned RegWidth) {
>>    // Symbols are only defined for GCN targets
>> -  if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
>> +  if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
>>      return true;
>>
>>    auto SymbolName = getGprCountSymbolName(RegKind);
>> @@ -2637,18 +2637,18 @@ bool AMDGPUAsmParser::calculateGPRBlocks
>>      unsigned &SGPRBlocks) {
>>    // TODO(scott.linder): These calculations are duplicated from
>>    // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
>> -  IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
>> +  IsaVersion Version = getIsaVersion(getSTI().getCPU());
>>
>>    unsigned NumVGPRs = NextFreeVGPR;
>>    unsigned NumSGPRs = NextFreeSGPR;
>> -  unsigned MaxAddressableNumSGPRs =
>> IsaInfo::getAddressableNumSGPRs(Features);
>> +  unsigned MaxAddressableNumSGPRs =
>> IsaInfo::getAddressableNumSGPRs(&getSTI());
>>
>>    if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
>>        NumSGPRs > MaxAddressableNumSGPRs)
>>      return OutOfRangeError(SGPRRange);
>>
>>    NumSGPRs +=
>> -      IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
>> XNACKUsed);
>> +      IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed,
>> XNACKUsed);
>>
>>    if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
>>        NumSGPRs > MaxAddressableNumSGPRs)
>> @@ -2657,8 +2657,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks
>>    if (Features.test(FeatureSGPRInitBug))
>>      NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
>>
>> -  VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
>> -  SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
>> +  VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
>> +  SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
>>
>>    return false;
>>  }
>> @@ -2678,8 +2678,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
>>
>>    StringSet<> Seen;
>>
>> -  IsaInfo::IsaVersion IVersion =
>> -      IsaInfo::getIsaVersion(getSTI().getFeatureBits());
>> +  IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
>>
>>    SMRange VGPRRange;
>>    uint64_t NextFreeVGPR = 0;
>> @@ -2938,8 +2937,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAC
>>    // If this directive has no arguments, then use the ISA version for the
>>    // targeted GPU.
>>    if (getLexer().is(AsmToken::EndOfStatement)) {
>> -    AMDGPU::IsaInfo::IsaVersion ISA =
>> -        AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
>> +    AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
>>      getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major,
>> ISA.Minor,
>>                                                        ISA.Stepping,
>>                                                        "AMD", "AMDGPU");
>> @@ -3001,7 +2999,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCode
>>
>>  bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
>>    amd_kernel_code_t Header;
>> -  AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
>> +  AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
>>
>>    while (true) {
>>      // Lex EndOfStatement.  This is in a while loop, because lexing a
>> comment
>> @@ -3679,12 +3677,12 @@ void AMDGPUAsmParser::cvtExp(MCInst &Ins
>>
>>  static bool
>>  encodeCnt(
>> -  const AMDGPU::IsaInfo::IsaVersion ISA,
>> +  const AMDGPU::IsaVersion ISA,
>>    int64_t &IntVal,
>>    int64_t CntVal,
>>    bool Saturate,
>> -  unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned,
>> unsigned),
>> -  unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
>> +  unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
>> +  unsigned (*decode)(const IsaVersion &Version, unsigned))
>>  {
>>    bool Failed = false;
>>
>> @@ -3715,8 +3713,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &
>>    if (getParser().parseAbsoluteExpression(CntVal))
>>      return true;
>>
>> -  AMDGPU::IsaInfo::IsaVersion ISA =
>> -      AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
>> +  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
>>
>>    bool Failed = true;
>>    bool Sat = CntName.endswith("_sat");
>> @@ -3751,8 +3748,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &
>>
>>  OperandMatchResultTy
>>  AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
>> -  AMDGPU::IsaInfo::IsaVersion ISA =
>> -      AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
>> +  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
>>    int64_t Waitcnt = getWaitcntBitMask(ISA);
>>    SMLoc S = Parser.getTok().getLoc();
>>
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
>> (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Tue
>> Sep 11 11:56:51 2018
>> @@ -1155,8 +1155,7 @@ void AMDGPUInstPrinter::printSwizzle(con
>>  void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
>>                                        const MCSubtargetInfo &STI,
>>                                        raw_ostream &O) {
>> -  AMDGPU::IsaInfo::IsaVersion ISA =
>> -      AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
>> +  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
>>
>>    unsigned SImm16 = MI->getOperand(OpNo).getImm();
>>    unsigned Vmcnt, Expcnt, Lgkmcnt;
>>
>> Modified:
>> llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
>> (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
>> Tue Sep 11 11:56:51 2018
>> @@ -27,6 +27,7 @@
>>  #include "llvm/MC/MCObjectFileInfo.h"
>>  #include "llvm/MC/MCSectionELF.h"
>>  #include "llvm/Support/FormattedStream.h"
>> +#include "llvm/Support/TargetParser.h"
>>
>>  namespace llvm {
>>  #include "AMDGPUPTNote.h"
>> @@ -39,84 +40,6 @@ using namespace llvm::AMDGPU;
>>  // AMDGPUTargetStreamer
>>
>>  //===----------------------------------------------------------------------===//
>>
>> -static const struct {
>> -  const char *Name;
>> -  unsigned Mach;
>> -} MachTable[] = {
>> -      // Radeon HD 2000/3000 Series (R600).
>> -      { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
>> -      { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
>> -      { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
>> -      { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
>> -      // Radeon HD 4000 Series (R700).
>> -      { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
>> -      { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
>> -      { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
>> -      // Radeon HD 5000 Series (Evergreen).
>> -      { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
>> -      { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
>> -      { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
>> -      { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
>> -      { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
>> -      // Radeon HD 6000 Series (Northern Islands).
>> -      { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
>> -      { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
>> -      { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
>> -      { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
>> -      // AMDGCN GFX6.
>> -      { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
>> -      { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
>> -      { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
>> -      { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
>> -      { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
>> -      { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
>> -      { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
>> -      // AMDGCN GFX7.
>> -      { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
>> -      { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
>> -      { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
>> -      { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
>> -      { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
>> -      { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
>> -      { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
>> -      { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
>> -      { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
>> -      { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
>> -      // AMDGCN GFX8.
>> -      { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
>> -      { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
>> -      { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
>> -      { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
>> -      { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
>> -      { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
>> -      { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
>> -      { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
>> -      { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
>> -      { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
>> -      { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
>> -      // AMDGCN GFX9.
>> -      { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
>> -      { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
>> -      { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
>> -      { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
>> -      // Not specified processor.
>> -      { nullptr, ELF::EF_AMDGPU_MACH_NONE }
>> -};
>> -
>> -unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
>> -  auto Entry = MachTable;
>> -  for (; Entry->Name && GPU != Entry->Name; ++Entry)
>> -    ;
>> -  return Entry->Mach;
>> -}
>> -
>> -const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
>> -  auto Entry = MachTable;
>> -  for (; Entry->Name && Mach != Entry->Mach; ++Entry)
>> -    ;
>> -  return Entry->Name;
>> -}
>> -
>>  bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
>>    HSAMD::Metadata HSAMetadata;
>>    if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
>> @@ -205,7 +128,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsa
>>      bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
>>    amdhsa::kernel_descriptor_t DefaultKD =
>> getDefaultAmdhsaKernelDescriptor();
>>
>> -  IsaInfo::IsaVersion IVersion =
>> IsaInfo::getIsaVersion(STI.getFeatureBits());
>> +  IsaVersion IVersion = getIsaVersion(STI.getCPU());
>>
>>    OS << "\t.amdhsa_kernel " << KernelName << '\n';
>>
>> @@ -342,7 +265,7 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELF
>>    unsigned EFlags = MCA.getELFHeaderEFlags();
>>
>>    EFlags &= ~ELF::EF_AMDGPU_MACH;
>> -  EFlags |= getMACH(STI.getCPU());
>> +  EFlags |= getElfMach(STI.getCPU());
>>
>>    EFlags &= ~ELF::EF_AMDGPU_XNACK;
>>    if (AMDGPU::hasXNACK(STI))
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
>> (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h Tue
>> Sep 11 11:56:51 2018
>> @@ -31,13 +31,7 @@ class AMDGPUTargetStreamer : public MCTa
>>  protected:
>>    MCContext &getContext() const { return Streamer.getContext(); }
>>
>> -  /// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name.
>> -  unsigned getMACH(StringRef GPU) const;
>> -
>>  public:
>> -  /// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value.
>> -  static const char *getMachName(unsigned Mach);
>> -
>>    AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
>>
>>    virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Tue Sep 11 11:56:51
>> 2018
>> @@ -369,7 +369,7 @@ private:
>>    const SIRegisterInfo *TRI = nullptr;
>>    const MachineRegisterInfo *MRI = nullptr;
>>    const MachineLoopInfo *MLI = nullptr;
>> -  AMDGPU::IsaInfo::IsaVersion IV;
>> +  AMDGPU::IsaVersion IV;
>>
>>    DenseSet<MachineBasicBlock *> BlockVisitedSet;
>>    DenseSet<MachineInstr *> TrackedWaitcntSet;
>> @@ -1841,7 +1841,7 @@ bool SIInsertWaitcnts::runOnMachineFunct
>>    TRI = &TII->getRegisterInfo();
>>    MRI = &MF.getRegInfo();
>>    MLI = &getAnalysis<MachineLoopInfo>();
>> -  IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
>> +  IV = AMDGPU::getIsaVersion(ST->getCPU());
>>    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
>>
>>    ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp Tue Sep 11
>> 11:56:51 2018
>> @@ -253,7 +253,7 @@ protected:
>>    /// Instruction info.
>>    const SIInstrInfo *TII = nullptr;
>>
>> -  IsaInfo::IsaVersion IV;
>> +  IsaVersion IV;
>>
>>    SICacheControl(const GCNSubtarget &ST);
>>
>> @@ -605,7 +605,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::get
>>
>>  SICacheControl::SICacheControl(const GCNSubtarget &ST) {
>>    TII = ST.getInstrInfo();
>> -  IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
>> +  IV = getIsaVersion(ST.getCPU());
>>  }
>>
>>  /* static */
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Tue Sep 11
>> 11:56:51 2018
>> @@ -137,68 +137,18 @@ int getMCOpcode(uint16_t Opcode, unsigne
>>
>>  namespace IsaInfo {
>>
>> -IsaVersion getIsaVersion(const FeatureBitset &Features) {
>> -  // GCN GFX6 (Southern Islands (SI)).
>> -  if (Features.test(FeatureISAVersion6_0_0))
>> -    return {6, 0, 0};
>> -  if (Features.test(FeatureISAVersion6_0_1))
>> -    return {6, 0, 1};
>> -
>> -  // GCN GFX7 (Sea Islands (CI)).
>> -  if (Features.test(FeatureISAVersion7_0_0))
>> -    return {7, 0, 0};
>> -  if (Features.test(FeatureISAVersion7_0_1))
>> -    return {7, 0, 1};
>> -  if (Features.test(FeatureISAVersion7_0_2))
>> -    return {7, 0, 2};
>> -  if (Features.test(FeatureISAVersion7_0_3))
>> -    return {7, 0, 3};
>> -  if (Features.test(FeatureISAVersion7_0_4))
>> -    return {7, 0, 4};
>> -  if (Features.test(FeatureSeaIslands))
>> -    return {7, 0, 0};
>> -
>> -  // GCN GFX8 (Volcanic Islands (VI)).
>> -  if (Features.test(FeatureISAVersion8_0_1))
>> -    return {8, 0, 1};
>> -  if (Features.test(FeatureISAVersion8_0_2))
>> -    return {8, 0, 2};
>> -  if (Features.test(FeatureISAVersion8_0_3))
>> -    return {8, 0, 3};
>> -  if (Features.test(FeatureISAVersion8_1_0))
>> -    return {8, 1, 0};
>> -  if (Features.test(FeatureVolcanicIslands))
>> -    return {8, 0, 0};
>> -
>> -  // GCN GFX9.
>> -  if (Features.test(FeatureISAVersion9_0_0))
>> -    return {9, 0, 0};
>> -  if (Features.test(FeatureISAVersion9_0_2))
>> -    return {9, 0, 2};
>> -  if (Features.test(FeatureISAVersion9_0_4))
>> -    return {9, 0, 4};
>> -  if (Features.test(FeatureISAVersion9_0_6))
>> -    return {9, 0, 6};
>> -  if (Features.test(FeatureGFX9))
>> -    return {9, 0, 0};
>> -
>> -  if (Features.test(FeatureSouthernIslands))
>> -    return {0, 0, 0};
>> -  return {7, 0, 0};
>> -}
>> -
>>  void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
>>    auto TargetTriple = STI->getTargetTriple();
>> -  auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
>> +  auto Version = getIsaVersion(STI->getCPU());
>>
>>    Stream << TargetTriple.getArchName() << '-'
>>           << TargetTriple.getVendorName() << '-'
>>           << TargetTriple.getOSName() << '-'
>>           << TargetTriple.getEnvironmentName() << '-'
>>           << "gfx"
>> -         << ISAVersion.Major
>> -         << ISAVersion.Minor
>> -         << ISAVersion.Stepping;
>> +         << Version.Major
>> +         << Version.Minor
>> +         << Version.Stepping;
>>
>>    if (hasXNACK(*STI))
>>      Stream << "+xnack";
>> @@ -210,49 +160,49 @@ bool hasCodeObjectV3(const MCSubtargetIn
>>    return STI->getFeatureBits().test(FeatureCodeObjectV3);
>>  }
>>
>> -unsigned getWavefrontSize(const FeatureBitset &Features) {
>> -  if (Features.test(FeatureWavefrontSize16))
>> +unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
>> +  if (STI->getFeatureBits().test(FeatureWavefrontSize16))
>>      return 16;
>> -  if (Features.test(FeatureWavefrontSize32))
>> +  if (STI->getFeatureBits().test(FeatureWavefrontSize32))
>>      return 32;
>>
>>    return 64;
>>  }
>>
>> -unsigned getLocalMemorySize(const FeatureBitset &Features) {
>> -  if (Features.test(FeatureLocalMemorySize32768))
>> +unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
>> +  if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
>>      return 32768;
>> -  if (Features.test(FeatureLocalMemorySize65536))
>> +  if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
>>      return 65536;
>>
>>    return 0;
>>  }
>>
>> -unsigned getEUsPerCU(const FeatureBitset &Features) {
>> +unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
>>    return 4;
>>  }
>>
>> -unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
>> +unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
>>                                 unsigned FlatWorkGroupSize) {
>> -  if (!Features.test(FeatureGCN))
>> +  if (!STI->getFeatureBits().test(FeatureGCN))
>>      return 8;
>> -  unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
>> +  unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
>>    if (N == 1)
>>      return 40;
>>    N = 40 / N;
>>    return std::min(N, 16u);
>>  }
>>
>> -unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
>> -  return getMaxWavesPerEU() * getEUsPerCU(Features);
>> +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
>> +  return getMaxWavesPerEU() * getEUsPerCU(STI);
>>  }
>>
>> -unsigned getMaxWavesPerCU(const FeatureBitset &Features,
>> +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
>>                            unsigned FlatWorkGroupSize) {
>> -  return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
>> +  return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
>>  }
>>
>> -unsigned getMinWavesPerEU(const FeatureBitset &Features) {
>> +unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
>>    return 1;
>>  }
>>
>> @@ -261,89 +211,89 @@ unsigned getMaxWavesPerEU() {
>>    return 10;
>>  }
>>
>> -unsigned getMaxWavesPerEU(const FeatureBitset &Features,
>> +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
>>                            unsigned FlatWorkGroupSize) {
>> -  return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
>> -                 getEUsPerCU(Features)) / getEUsPerCU(Features);
>> +  return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
>> +                 getEUsPerCU(STI)) / getEUsPerCU(STI);
>>  }
>>
>> -unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
>> +unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
>>    return 1;
>>  }
>>
>> -unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
>> +unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
>>    return 2048;
>>  }
>>
>> -unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
>> +unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
>>                                unsigned FlatWorkGroupSize) {
>> -  return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
>> -                 getWavefrontSize(Features);
>> +  return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
>> +                 getWavefrontSize(STI);
>>  }
>>
>> -unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
>> -  IsaVersion Version = getIsaVersion(Features);
>> +unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
>> +  IsaVersion Version = getIsaVersion(STI->getCPU());
>>    if (Version.Major >= 8)
>>      return 16;
>>    return 8;
>>  }
>>
>> -unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
>> +unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
>>    return 8;
>>  }
>>
>> -unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
>> -  IsaVersion Version = getIsaVersion(Features);
>> +unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
>> +  IsaVersion Version = getIsaVersion(STI->getCPU());
>>    if (Version.Major >= 8)
>>      return 800;
>>    return 512;
>>  }
>>
>> -unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
>> -  if (Features.test(FeatureSGPRInitBug))
>> +unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
>> +  if (STI->getFeatureBits().test(FeatureSGPRInitBug))
>>      return FIXED_NUM_SGPRS_FOR_INIT_BUG;
>>
>> -  IsaVersion Version = getIsaVersion(Features);
>> +  IsaVersion Version = getIsaVersion(STI->getCPU());
>>    if (Version.Major >= 8)
>>      return 102;
>>    return 104;
>>  }
>>
>> -unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned
>> WavesPerEU) {
>> +unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
>> {
>>    assert(WavesPerEU != 0);
>>
>>    if (WavesPerEU >= getMaxWavesPerEU())
>>      return 0;
>>
>> -  unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
>> -  if (Features.test(FeatureTrapHandler))
>> +  unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
>> +  if (STI->getFeatureBits().test(FeatureTrapHandler))
>>      MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
>> -  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) +
>> 1;
>> -  return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
>> +  MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
>> +  return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
>>  }
>>
>> -unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned
>> WavesPerEU,
>> +unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
>>                          bool Addressable) {
>>    assert(WavesPerEU != 0);
>>
>> -  IsaVersion Version = getIsaVersion(Features);
>> -  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
>> +  IsaVersion Version = getIsaVersion(STI->getCPU());
>> +  unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
>>    if (Version.Major >= 8 && !Addressable)
>>      AddressableNumSGPRs = 112;
>> -  unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
>> -  if (Features.test(FeatureTrapHandler))
>> +  unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
>> +  if (STI->getFeatureBits().test(FeatureTrapHandler))
>>      MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
>> -  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
>> +  MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
>>    return std::min(MaxNumSGPRs, AddressableNumSGPRs);
>>  }
>>
>> -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
>> +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
>>                            bool FlatScrUsed, bool XNACKUsed) {
>>    unsigned ExtraSGPRs = 0;
>>    if (VCCUsed)
>>      ExtraSGPRs = 2;
>>
>> -  IsaVersion Version = getIsaVersion(Features);
>> +  IsaVersion Version = getIsaVersion(STI->getCPU());
>>    if (Version.Major < 8) {
>>      if (FlatScrUsed)
>>        ExtraSGPRs = 4;
>> @@ -358,74 +308,74 @@ unsigned getNumExtraSGPRs(const FeatureB
>>    return ExtraSGPRs;
>>  }
>>
>> -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
>> +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
>>                            bool FlatScrUsed) {
>> -  return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
>> -                          Features[AMDGPU::FeatureXNACK]);
>> +  return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
>> +
>> STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
>>  }
>>
>> -unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned
>> NumSGPRs) {
>> -  NumSGPRs = alignTo(std::max(1u, NumSGPRs),
>> getSGPREncodingGranule(Features));
>> +unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs)
>> {
>> +  NumSGPRs = alignTo(std::max(1u, NumSGPRs),
>> getSGPREncodingGranule(STI));
>>    // SGPRBlocks is actual number of SGPR blocks minus 1.
>> -  return NumSGPRs / getSGPREncodingGranule(Features) - 1;
>> +  return NumSGPRs / getSGPREncodingGranule(STI) - 1;
>>  }
>>
>> -unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
>> +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
>>    return 4;
>>  }
>>
>> -unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
>> -  return getVGPRAllocGranule(Features);
>> +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
>> +  return getVGPRAllocGranule(STI);
>>  }
>>
>> -unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
>> +unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
>>    return 256;
>>  }
>>
>> -unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
>> -  return getTotalNumVGPRs(Features);
>> +unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
>> +  return getTotalNumVGPRs(STI);
>>  }
>>
>> -unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned
>> WavesPerEU) {
>> +unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
>> {
>>    assert(WavesPerEU != 0);
>>
>>    if (WavesPerEU >= getMaxWavesPerEU())
>>      return 0;
>>    unsigned MinNumVGPRs =
>> -      alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
>> -                getVGPRAllocGranule(Features)) + 1;
>> -  return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
>> +      alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
>> +                getVGPRAllocGranule(STI)) + 1;
>> +  return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
>>  }
>>
>> -unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned
>> WavesPerEU) {
>> +unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU)
>> {
>>    assert(WavesPerEU != 0);
>>
>> -  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) /
>> WavesPerEU,
>> -                                   getVGPRAllocGranule(Features));
>> -  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
>> +  unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
>> +                                   getVGPRAllocGranule(STI));
>> +  unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
>>    return std::min(MaxNumVGPRs, AddressableNumVGPRs);
>>  }
>>
>> -unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned
>> NumVGPRs) {
>> -  NumVGPRs = alignTo(std::max(1u, NumVGPRs),
>> getVGPREncodingGranule(Features));
>> +unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs)
>> {
>> +  NumVGPRs = alignTo(std::max(1u, NumVGPRs),
>> getVGPREncodingGranule(STI));
>>    // VGPRBlocks is actual number of VGPR blocks minus 1.
>> -  return NumVGPRs / getVGPREncodingGranule(Features) - 1;
>> +  return NumVGPRs / getVGPREncodingGranule(STI) - 1;
>>  }
>>
>>  } // end namespace IsaInfo
>>
>>  void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
>> -                               const FeatureBitset &Features) {
>> -  IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
>> +                               const MCSubtargetInfo *STI) {
>> +  IsaVersion Version = getIsaVersion(STI->getCPU());
>>
>>    memset(&Header, 0, sizeof(Header));
>>
>>    Header.amd_kernel_code_version_major = 1;
>>    Header.amd_kernel_code_version_minor = 2;
>>    Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
>> -  Header.amd_machine_version_major = ISA.Major;
>> -  Header.amd_machine_version_minor = ISA.Minor;
>> -  Header.amd_machine_version_stepping = ISA.Stepping;
>> +  Header.amd_machine_version_major = Version.Major;
>> +  Header.amd_machine_version_minor = Version.Minor;
>> +  Header.amd_machine_version_stepping = Version.Stepping;
>>    Header.kernel_code_entry_byte_offset = sizeof(Header);
>>    // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
>>    Header.wavefront_size = 6;
>> @@ -513,7 +463,7 @@ std::pair<int, int> getIntegerPairAttrib
>>    return Ints;
>>  }
>>
>> -unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
>> +unsigned getVmcntBitMask(const IsaVersion &Version) {
>>    unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
>>    if (Version.Major < 9)
>>      return VmcntLo;
>> @@ -522,15 +472,15 @@ unsigned getVmcntBitMask(const IsaInfo::
>>    return VmcntLo | VmcntHi;
>>  }
>>
>> -unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
>> +unsigned getExpcntBitMask(const IsaVersion &Version) {
>>    return (1 << getExpcntBitWidth()) - 1;
>>  }
>>
>> -unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
>> +unsigned getLgkmcntBitMask(const IsaVersion &Version) {
>>    return (1 << getLgkmcntBitWidth()) - 1;
>>  }
>>
>> -unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
>> +unsigned getWaitcntBitMask(const IsaVersion &Version) {
>>    unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(),
>> getVmcntBitWidthLo());
>>    unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
>>    unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
>> getLgkmcntBitWidth());
>> @@ -542,7 +492,7 @@ unsigned getWaitcntBitMask(const IsaInfo
>>    return Waitcnt | VmcntHi;
>>  }
>>
>> -unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned
>> Waitcnt) {
>> +unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
>>    unsigned VmcntLo =
>>        unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
>>    if (Version.Major < 9)
>> @@ -554,22 +504,22 @@ unsigned decodeVmcnt(const IsaInfo::IsaV
>>    return VmcntLo | VmcntHi;
>>  }
>>
>> -unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned
>> Waitcnt) {
>> +unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
>>    return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
>>  }
>>
>> -unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned
>> Waitcnt) {
>> +unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
>>    return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
>>  }
>>
>> -void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
>> +void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
>>                     unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt)
>> {
>>    Vmcnt = decodeVmcnt(Version, Waitcnt);
>>    Expcnt = decodeExpcnt(Version, Waitcnt);
>>    Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
>>  }
>>
>> -unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned
>> Waitcnt,
>> +unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
>>                       unsigned Vmcnt) {
>>    Waitcnt =
>>        packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(),
>> getVmcntBitWidthLo());
>> @@ -580,17 +530,17 @@ unsigned encodeVmcnt(const IsaInfo::IsaV
>>    return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(),
>> getVmcntBitWidthHi());
>>  }
>>
>> -unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned
>> Waitcnt,
>> +unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
>>                        unsigned Expcnt) {
>>    return packBits(Expcnt, Waitcnt, getExpcntBitShift(),
>> getExpcntBitWidth());
>>  }
>>
>> -unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned
>> Waitcnt,
>> +unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
>>                         unsigned Lgkmcnt) {
>>    return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
>> getLgkmcntBitWidth());
>>  }
>>
>> -unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
>> +unsigned encodeWaitcnt(const IsaVersion &Version,
>>                         unsigned Vmcnt, unsigned Expcnt, unsigned
>> Lgkmcnt) {
>>    unsigned Waitcnt = getWaitcntBitMask(Version);
>>    Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
>>
>> Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=341982&r1=341981&r2=341982&view=diff
>>
>> ==============================================================================
>> --- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
>> +++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Tue Sep 11
>> 11:56:51 2018
>> @@ -19,6 +19,7 @@
>>  #include "llvm/Support/AMDHSAKernelDescriptor.h"
>>  #include "llvm/Support/Compiler.h"
>>  #include "llvm/Support/ErrorHandling.h"
>> +#include "llvm/Support/TargetParser.h"
>>  #include <cstdint>
>>  #include <string>
>>  #include <utility>
>> @@ -56,16 +57,6 @@ enum {
>>    TRAP_NUM_SGPRS = 16
>>  };
>>
>> -/// Instruction set architecture version.
>> -struct IsaVersion {
>> -  unsigned Major;
>> -  unsigned Minor;
>> -  unsigned Stepping;
>> -};
>> -
>> -/// \returns Isa version for given subtarget \p Features.
>> -IsaVersion getIsaVersion(const FeatureBitset &Features);
>> -
>>  /// Streams isa version string for given subtarget \p STI into \p Stream.
>>  void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
>>
>> @@ -73,114 +64,114 @@ void streamIsaVersion(const MCSubtargetI
>>  /// false otherwise.
>>  bool hasCodeObjectV3(const MCSubtargetInfo *STI);
>>
>> -/// \returns Wavefront size for given subtarget \p Features.
>> -unsigned getWavefrontSize(const FeatureBitset &Features);
>> +/// \returns Wavefront size for given subtarget \p STI.
>> +unsigned getWavefrontSize(const MCSubtargetInfo *STI);
>>
>> -/// \returns Local memory size in bytes for given subtarget \p Features.
>> -unsigned getLocalMemorySize(const FeatureBitset &Features);
>> +/// \returns Local memory size in bytes for given subtarget \p STI.
>> +unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
>>
>>  /// \returns Number of execution units per compute unit for given
>> subtarget \p
>> -/// Features.
>> -unsigned getEUsPerCU(const FeatureBitset &Features);
>> +/// STI.
>> +unsigned getEUsPerCU(const MCSubtargetInfo *STI);
>>
>>  /// \returns Maximum number of work groups per compute unit for given
>> subtarget
>> -/// \p Features and limited by given \p FlatWorkGroupSize.
>> -unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
>> +/// \p STI and limited by given \p FlatWorkGroupSize.
>> +unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
>>                                 unsigned FlatWorkGroupSize);
>>
>>  /// \returns Maximum number of waves per compute unit for given
>> subtarget \p
>> -/// Features without any kind of limitation.
>> -unsigned getMaxWavesPerCU(const FeatureBitset &Features);
>> +/// STI without any kind of limitation.
>> +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
>>
>>  /// \returns Maximum number of waves per compute unit for given
>> subtarget \p
>> -/// Features and limited by given \p FlatWorkGroupSize.
>> -unsigned getMaxWavesPerCU(const FeatureBitset &Features,
>> +/// STI and limited by given \p FlatWorkGroupSize.
>> +unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
>>                            unsigned FlatWorkGroupSize);
>>
>>  /// \returns Minimum number of waves per execution unit for given
>> subtarget \p
>> -/// Features.
>> -unsigned getMinWavesPerEU(const FeatureBitset &Features);
>> +/// STI.
>> +unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
>>
>>  /// \returns Maximum number of waves per execution unit for given
>> subtarget \p
>> -/// Features without any kind of limitation.
>> +/// STI without any kind of limitation.
>>  unsigned getMaxWavesPerEU();
>>
>>  /// \returns Maximum number of waves per execution unit for given
>> subtarget \p
>> -/// Features and limited by given \p FlatWorkGroupSize.
>> -unsigned getMaxWavesPerEU(const FeatureBitset &Features,
>> +/// STI and limited by given \p FlatWorkGroupSize.
>> +unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
>>                            unsigned FlatWorkGroupSize);
>>
>> -/// \returns Minimum flat work group size for given subtarget \p
>> Features.
>> -unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
>> +/// \returns Minimum flat work group size for given subtarget \p STI.
>> +unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
>>
>> -/// \returns Maximum flat work group size for given subtarget \p
>> Features.
>> -unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
>> +/// \returns Maximum flat work group size for given subtarget \p STI.
>> +unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
>>
>> -/// \returns Number of waves per work group for given subtarget \p
>> Features and
>> +/// \returns Number of waves per work group for given subtarget \p STI
>> and
>>  /// limited by given \p FlatWorkGroupSize.
>> -unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
>> +unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
>>                                unsigned FlatWorkGroupSize);
>>
>> -/// \returns SGPR allocation granularity for given subtarget \p Features.
>> -unsigned getSGPRAllocGranule(const FeatureBitset &Features);
>> +/// \returns SGPR allocation granularity for given subtarget \p STI.
>> +unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
>>
>> -/// \returns SGPR encoding granularity for given subtarget \p Features.
>> -unsigned getSGPREncodingGranule(const FeatureBitset &Features);
>> +/// \returns SGPR encoding granularity for given subtarget \p STI.
>> +unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
>>
>> -/// \returns Total number of SGPRs for given subtarget \p Features.
>> -unsigned getTotalNumSGPRs(const FeatureBitset &Features);
>> +/// \returns Total number of SGPRs for given subtarget \p STI.
>> +unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
>>
>> -/// \returns Addressable number of SGPRs for given subtarget \p Features.
>> -unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
>> +/// \returns Addressable number of SGPRs for given subtarget \p STI.
>> +unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
>>
>>  /// \returns Minimum number of SGPRs that meets the given number of
>> waves per
>> -/// execution unit requirement for given subtarget \p Features.
>> -unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned
>> WavesPerEU);
>> +/// execution unit requirement for given subtarget \p STI.
>> +unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
>>
>>  /// \returns Maximum number of SGPRs that meets the given number of
>> waves per
>> -/// execution unit requirement for given subtarget \p Features.
>> -unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned
>> WavesPerEU,
>> +/// execution unit requirement for given subtarget \p STI.
>> +unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
>>                          bool Addressable);
>>
>>  /// \returns Number of extra SGPRs implicitly required by given
>> subtarget \p
>> -/// Features when the given special registers are used.
>> -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
>> +/// STI when the given special registers are used.
>> +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
>>                            bool FlatScrUsed, bool XNACKUsed);
>>
>>  /// \returns Number of extra SGPRs implicitly required by given
>> subtarget \p
>> -/// Features when the given special registers are used. XNACK is
>> inferred from
>> -/// \p Features.
>> -unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
>> +/// STI when the given special registers are used. XNACK is inferred from
>> +/// \p STI.
>> +unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
>>                            bool FlatScrUsed);
>>
>> -/// \returns Number of SGPR blocks needed for given subtarget \p
>> Features when
>> +/// \returns Number of SGPR blocks needed for given subtarget \p STI when
>>  /// \p NumSGPRs are used. \p NumSGPRs should already include any special
>>  /// register counts.
>> -unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned
>> NumSGPRs);
>> +unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
>>
>> -/// \returns VGPR allocation granularity for given subtarget \p Features.
>> -unsigned getVGPRAllocGranule(const FeatureBitset &Features);
>> +/// \returns VGPR allocation granularity for given subtarget \p STI.
>> +unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
>>
>> -/// \returns VGPR encoding granularity for given subtarget \p Features.
>> -unsigned getVGPREncodingGranule(const FeatureBitset &Features);
>> +/// \returns VGPR encoding granularity for given subtarget \p STI.
>> +unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
>>
>> -/// \returns Total number of VGPRs for given subtarget \p Features.
>> -unsigned getTotalNumVGPRs(const FeatureBitset &Features);
>> +/// \returns Total number of VGPRs for given subtarget \p STI.
>> +unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
>>
>> -/// \returns Addressable number of VGPRs for given subtarget \p Features.
>> -unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
>> +/// \returns Addressable number of VGPRs for given subtarget \p STI.
>> +unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
>>
>>  /// \returns Minimum number of VGPRs that meets given number of waves per
>> -/// execution unit requirement for given subtarget \p Features.
>> -unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned
>> WavesPerEU);
>> +/// execution unit requirement for given subtarget \p STI.
>> +unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
>>
>>  /// \returns Maximum number of VGPRs that meets given number of waves per
>> -/// execution unit requirement for given subtarget \p Features.
>> -unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned
>> WavesPerEU);
>> +/// execution unit requirement for given subtarget \p STI.
>> +unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
>>
>> -/// \returns Number of VGPR blocks needed for given subtarget \p
>> Features when
>> +/// \returns Number of VGPR blocks needed for given subtarget \p STI when
>>  /// \p NumVGPRs are used.
>> -unsigned
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>


-- 
Regards,
Ilya Biryukov
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180912/08659476/attachment-0001.html>


More information about the llvm-commits mailing list