[llvm] r294449 - [AMDGPU] Add target information that is required by tools to metadata

Wed Feb 8 06:05:23 PST 2017

Author: kzhuravl
Date: Wed Feb  8 08:05:23 2017
New Revision: 294449

URL: http://llvm.org/viewvc/llvm-project?rev=294449&view=rev
Log:
[AMDGPU] Add target information that is required by tools to metadata

Differential Revision: https://reviews.llvm.org/D28760#fb670e28

Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
    llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
    llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
    llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
    llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll
    llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll
    llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll
    llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Wed Feb  8 08:05:23 2017
@@ -109,12 +109,13 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
   TS->EmitDirectiveHSACodeObjectVersion(2, 1);
 
   const MCSubtargetInfo *STI = TM.getMCSubtargetInfo();
-  AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
+  AMDGPU::IsaInfo::IsaVersion ISA =
+      AMDGPU::IsaInfo::getIsaVersion(STI->getFeatureBits());
   TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
                                     "AMD", "AMDGPU");
 
   // Emit runtime metadata.
-  TS->EmitRuntimeMetadata(M);
+  TS->EmitRuntimeMetadata(STI->getFeatureBits(), M);
 }
 
 bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
@@ -485,7 +486,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
       DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
                                        "addressable scalar registers",
                                        MaxSGPR + 1, DS_Error,
-                                       DK_ResourceLimit, MaxAddressableNumSGPRs);
+                                       DK_ResourceLimit,
+                                       MaxAddressableNumSGPRs);
       Ctx.diagnose(Diag);
       MaxSGPR = MaxAddressableNumSGPRs - 1;
     }
@@ -509,25 +511,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
 
   if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
       STM.hasSGPRInitBug()) {
-    unsigned MaxNumSGPRs = STM.getAddressableNumSGPRs();
-    if (ProgInfo.NumSGPR > MaxNumSGPRs) {
-      // This can happen due to a compiler bug or when using inline asm to use the
-      // registers which are usually reserved for vcc etc.
-
+    unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
+    if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
+      // This can happen due to a compiler bug or when using inline asm to use
+      // the registers which are usually reserved for vcc etc.
       LLVMContext &Ctx = MF.getFunction()->getContext();
       DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
                                        "scalar registers",
                                        ProgInfo.NumSGPR, DS_Error,
-                                       DK_ResourceLimit, MaxNumSGPRs);
+                                       DK_ResourceLimit,
+                                       MaxAddressableNumSGPRs);
       Ctx.diagnose(Diag);
-      ProgInfo.NumSGPR = MaxNumSGPRs;
-      ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;
+      ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
+      ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
     }
   }
 
   if (STM.hasSGPRInitBug()) {
-    ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
-    ProgInfo.NumSGPRsForWavesPerEU = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+    ProgInfo.NumSGPR =
+        AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+    ProgInfo.NumSGPRsForWavesPerEU =
+        AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
   }
 
   if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
@@ -554,9 +558,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
                                 STM.getVGPREncodingGranule());
   ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1;
 
-  // Record first reserved register and reserved register count fields, and
-  // update max register counts if "amdgpu-debugger-reserve-regs" attribute was
-  // requested.
+  // Record first reserved VGPR and number of reserved VGPRs.
   ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
   ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h Wed Feb  8 08:05:23 2017
@@ -41,37 +41,90 @@ namespace RuntimeMD {
 
   // Version and revision of runtime metadata
   const unsigned char MDVersion   = 2;
-  const unsigned char MDRevision  = 0;
+  const unsigned char MDRevision  = 1;
 
   // Name of keys for runtime metadata.
   namespace KeyName {
 
-    const char MDVersion[]                = "amd.MDVersion";            // Runtime metadata version
-    const char Language[]                 = "amd.Language";             // Language
-    const char LanguageVersion[]          = "amd.LanguageVersion";      // Language version
-    const char Kernels[]                  = "amd.Kernels";              // Kernels
-    const char KernelName[]               = "amd.KernelName";           // Kernel name
-    const char Args[]                     = "amd.Args";                 // Kernel arguments
-    const char ArgSize[]                  = "amd.ArgSize";              // Kernel arg size
-    const char ArgAlign[]                 = "amd.ArgAlign";             // Kernel arg alignment
-    const char ArgTypeName[]              = "amd.ArgTypeName";          // Kernel type name
-    const char ArgName[]                  = "amd.ArgName";              // Kernel name
-    const char ArgKind[]                  = "amd.ArgKind";              // Kernel argument kind
-    const char ArgValueType[]             = "amd.ArgValueType";         // Kernel argument value type
-    const char ArgAddrQual[]              = "amd.ArgAddrQual";          // Kernel argument address qualifier
-    const char ArgAccQual[]               = "amd.ArgAccQual";           // Kernel argument access qualifier
-    const char ArgIsConst[]               = "amd.ArgIsConst";           // Kernel argument is const qualified
-    const char ArgIsRestrict[]            = "amd.ArgIsRestrict";        // Kernel argument is restrict qualified
-    const char ArgIsVolatile[]            = "amd.ArgIsVolatile";        // Kernel argument is volatile qualified
-    const char ArgIsPipe[]                = "amd.ArgIsPipe";            // Kernel argument is pipe qualified
-    const char ReqdWorkGroupSize[]        = "amd.ReqdWorkGroupSize";    // Required work group size
-    const char WorkGroupSizeHint[]        = "amd.WorkGroupSizeHint";    // Work group size hint
-    const char VecTypeHint[]              = "amd.VecTypeHint";          // Vector type hint
-    const char KernelIndex[]              = "amd.KernelIndex";          // Kernel index for device enqueue
-    const char NoPartialWorkGroups[]      = "amd.NoPartialWorkGroups";  // No partial work groups
-    const char PrintfInfo[]               = "amd.PrintfInfo";           // Prinf function call information
-    const char ArgActualAcc[]             = "amd.ArgActualAcc";         // The actual kernel argument access qualifier
-    const char ArgPointeeAlign[]          = "amd.ArgPointeeAlign";      // Alignment of pointee type
+    // Runtime metadata version
+    const char MDVersion[] = "amd.MDVersion";
+
+    // Instruction set architecture information
+    const char IsaInfo[] = "amd.IsaInfo";
+    // Wavefront size
+    const char IsaInfoWavefrontSize[] = "amd.IsaInfoWavefrontSize";
+    // Local memory size in bytes
+    const char IsaInfoLocalMemorySize[] = "amd.IsaInfoLocalMemorySize";
+    // Number of execution units per compute unit
+    const char IsaInfoEUsPerCU[] = "amd.IsaInfoEUsPerCU";
+    // Maximum number of waves per execution unit
+    const char IsaInfoMaxWavesPerEU[] = "amd.IsaInfoMaxWavesPerEU";
+    // Maximum flat work group size
+    const char IsaInfoMaxFlatWorkGroupSize[] = "amd.IsaInfoMaxFlatWorkGroupSize";
+    // SGPR allocation granularity
+    const char IsaInfoSGPRAllocGranule[] = "amd.IsaInfoSGPRAllocGranule";
+    // Total number of SGPRs
+    const char IsaInfoTotalNumSGPRs[] = "amd.IsaInfoTotalNumSGPRs";
+    // Addressable number of SGPRs
+    const char IsaInfoAddressableNumSGPRs[] = "amd.IsaInfoAddressableNumSGPRs";
+    // VGPR allocation granularity
+    const char IsaInfoVGPRAllocGranule[] = "amd.IsaInfoVGPRAllocGranule";
+    // Total number of VGPRs
+    const char IsaInfoTotalNumVGPRs[] = "amd.IsaInfoTotalNumVGPRs";
+    // Addressable number of VGPRs
+    const char IsaInfoAddressableNumVGPRs[] = "amd.IsaInfoAddressableNumVGPRs";
+
+    // Language
+    const char Language[] = "amd.Language";
+    // Language version
+    const char LanguageVersion[] = "amd.LanguageVersion";
+
+    // Kernels
+    const char Kernels[] = "amd.Kernels";
+    // Kernel name
+    const char KernelName[] = "amd.KernelName";
+    // Kernel arguments
+    const char Args[] = "amd.Args";
+    // Kernel argument size in bytes
+    const char ArgSize[] = "amd.ArgSize";
+    // Kernel argument alignment
+    const char ArgAlign[] = "amd.ArgAlign";
+    // Kernel argument type name
+    const char ArgTypeName[] = "amd.ArgTypeName";
+    // Kernel argument name
+    const char ArgName[] = "amd.ArgName";
+    // Kernel argument kind
+    const char ArgKind[] = "amd.ArgKind";
+    // Kernel argument value type
+    const char ArgValueType[] = "amd.ArgValueType";
+    // Kernel argument address qualifier
+    const char ArgAddrQual[] = "amd.ArgAddrQual";
+    // Kernel argument access qualifier
+    const char ArgAccQual[] = "amd.ArgAccQual";
+    // Kernel argument is const qualified
+    const char ArgIsConst[] = "amd.ArgIsConst";
+    // Kernel argument is restrict qualified
+    const char ArgIsRestrict[] = "amd.ArgIsRestrict";
+    // Kernel argument is volatile qualified
+    const char ArgIsVolatile[] = "amd.ArgIsVolatile";
+    // Kernel argument is pipe qualified
+    const char ArgIsPipe[] = "amd.ArgIsPipe";
+    // Required work group size
+    const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize";
+    // Work group size hint
+    const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint";
+    // Vector type hint
+    const char VecTypeHint[] = "amd.VecTypeHint";
+    // Kernel index for device enqueue
+    const char KernelIndex[] = "amd.KernelIndex";
+    // No partial work groups
+    const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups";
+    // Prinf function call information
+    const char PrintfInfo[] = "amd.PrintfInfo";
+    // The actual kernel argument access qualifier
+    const char ArgActualAcc[] = "amd.ArgActualAcc";
+    // Alignment of pointee type
+    const char ArgPointeeAlign[] = "amd.ArgPointeeAlign";
 
   } // end namespace KeyName
 
@@ -175,11 +228,45 @@ namespace RuntimeMD {
 
   } // end namespace Kernel
 
+  namespace IsaInfo {
+
+    /// \brief In-memory representation of instruction set architecture
+    /// information.
+    struct Metadata {
+      /// \brief Wavefront size.
+      unsigned WavefrontSize = 0;
+      /// \brief Local memory size in bytes.
+      unsigned LocalMemorySize = 0;
+      /// \brief Number of execution units per compute unit.
+      unsigned EUsPerCU = 0;
+      /// \brief Maximum number of waves per execution unit.
+      unsigned MaxWavesPerEU = 0;
+      /// \brief Maximum flat work group size.
+      unsigned MaxFlatWorkGroupSize = 0;
+      /// \brief SGPR allocation granularity.
+      unsigned SGPRAllocGranule = 0;
+      /// \brief Total number of SGPRs.
+      unsigned TotalNumSGPRs = 0;
+      /// \brief Addressable number of SGPRs.
+      unsigned AddressableNumSGPRs = 0;
+      /// \brief VGPR allocation granularity.
+      unsigned VGPRAllocGranule = 0;
+      /// \brief Total number of VGPRs.
+      unsigned TotalNumVGPRs = 0;
+      /// \brief Addressable number of VGPRs.
+      unsigned AddressableNumVGPRs = 0;
+
+      Metadata() = default;
+    };
+
+  } // end namespace IsaInfo
+
   namespace Program {
 
     // In-memory representation of program information.
     struct Metadata {
       std::vector<uint8_t> MDVersionSeq;
+      IsaInfo::Metadata IsaInfo;
       std::vector<std::string> PrintfInfo;
       std::vector<Kernel::Metadata> Kernels;
 

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Wed Feb  8 08:05:23 2017
@@ -323,53 +323,6 @@ unsigned SISubtarget::getOccupancyWithNu
   return 1;
 }
 
-unsigned SISubtarget::getMinNumSGPRs(unsigned WavesPerEU) const {
-  if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
-    switch (WavesPerEU) {
-      case 0:  return 0;
-      case 10: return 0;
-      case 9:  return 0;
-      case 8:  return 81;
-      default: return 97;
-    }
-  } else {
-    switch (WavesPerEU) {
-      case 0:  return 0;
-      case 10: return 0;
-      case 9:  return 49;
-      case 8:  return 57;
-      case 7:  return 65;
-      case 6:  return 73;
-      case 5:  return 81;
-      default: return 97;
-    }
-  }
-}
-
-unsigned SISubtarget::getMaxNumSGPRs(unsigned WavesPerEU,
-                                     bool Addressable) const {
-  if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
-    switch (WavesPerEU) {
-      case 0:  return 80;
-      case 10: return 80;
-      case 9:  return 80;
-      case 8:  return 96;
-      default: return Addressable ? getAddressableNumSGPRs() : 112;
-    }
-  } else {
-    switch (WavesPerEU) {
-      case 0:  return 48;
-      case 10: return 48;
-      case 9:  return 56;
-      case 8:  return 64;
-      case 7:  return 72;
-      case 6:  return 80;
-      case 5:  return 96;
-      default: return getAddressableNumSGPRs();
-    }
-  }
-}
-
 unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
   if (MFI.hasFlatScratchInit()) {
@@ -428,44 +381,12 @@ unsigned SISubtarget::getMaxNumSGPRs(con
   }
 
   if (hasSGPRInitBug())
-    MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+    MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
 
   return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
                   MaxAddressableNumSGPRs);
 }
 
-unsigned SISubtarget::getMinNumVGPRs(unsigned WavesPerEU) const {
-  switch (WavesPerEU) {
-    case 0:  return 0;
-    case 10: return 0;
-    case 9:  return 25;
-    case 8:  return 29;
-    case 7:  return 33;
-    case 6:  return 37;
-    case 5:  return 41;
-    case 4:  return 49;
-    case 3:  return 65;
-    case 2:  return 85;
-    default: return 129;
-  }
-}
-
-unsigned SISubtarget::getMaxNumVGPRs(unsigned WavesPerEU) const {
-  switch (WavesPerEU) {
-    case 0:  return 24;
-    case 10: return 24;
-    case 9:  return 28;
-    case 8:  return 32;
-    case 7:  return 36;
-    case 6:  return 40;
-    case 5:  return 48;
-    case 4:  return 64;
-    case 3:  return 84;
-    case 2:  return 128;
-    default: return getTotalNumVGPRs();
-  }
-}
-
 unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
   const Function &F = *MF.getFunction();
   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Wed Feb  8 08:05:23 2017
@@ -365,72 +365,71 @@ public:
     return true;
   }
 
+  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
+  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
+
   /// \returns Number of execution units per compute unit supported by the
   /// subtarget.
   unsigned getEUsPerCU() const {
-    return 4;
+    return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
   }
 
   /// \returns Maximum number of work groups per compute unit supported by the
-  /// subtarget and limited by given flat work group size.
+  /// subtarget and limited by given \p FlatWorkGroupSize.
   unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
-    if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
-      return 8;
-    return getWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16;
+    return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
+                                                  FlatWorkGroupSize);
   }
 
   /// \returns Maximum number of waves per compute unit supported by the
   /// subtarget without any kind of limitation.
   unsigned getMaxWavesPerCU() const {
-    return getMaxWavesPerEU() * getEUsPerCU();
+    return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
   }
 
   /// \returns Maximum number of waves per compute unit supported by the
-  /// subtarget and limited by given flat work group size.
+  /// subtarget and limited by given \p FlatWorkGroupSize.
   unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
-    return getWavesPerWorkGroup(FlatWorkGroupSize);
+    return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
+                                             FlatWorkGroupSize);
   }
 
   /// \returns Minimum number of waves per execution unit supported by the
   /// subtarget.
   unsigned getMinWavesPerEU() const {
-    return 1;
+    return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
   }
 
   /// \returns Maximum number of waves per execution unit supported by the
   /// subtarget without any kind of limitation.
   unsigned getMaxWavesPerEU() const {
-    if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
-      return 8;
-    // FIXME: Need to take scratch memory into account.
-    return 10;
+    return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
   }
 
   /// \returns Maximum number of waves per execution unit supported by the
-  /// subtarget and limited by given flat work group size.
+  /// subtarget and limited by given \p FlatWorkGroupSize.
   unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
-    return alignTo(getMaxWavesPerCU(FlatWorkGroupSize), getEUsPerCU()) /
-      getEUsPerCU();
+    return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
+                                             FlatWorkGroupSize);
   }
 
   /// \returns Minimum flat work group size supported by the subtarget.
   unsigned getMinFlatWorkGroupSize() const {
-    return 1;
+    return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
   }
 
   /// \returns Maximum flat work group size supported by the subtarget.
   unsigned getMaxFlatWorkGroupSize() const {
-    return 2048;
+    return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
   }
 
-  /// \returns Number of waves per work group given the flat work group size.
+  /// \returns Number of waves per work group supported by the subtarget and
+  /// limited by given \p FlatWorkGroupSize.
   unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
-    return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize();
+    return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
+                                                 FlatWorkGroupSize);
   }
 
-  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
-  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
-
   /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
   /// for function \p F, or minimum/maximum flat work group sizes explicitly
   /// requested using "amdgpu-flat-work-group-size" attribute attached to
@@ -492,13 +491,6 @@ public:
 };
 
 class SISubtarget final : public AMDGPUSubtarget {
-public:
-  enum {
-    // The closed Vulkan driver sets 96, which limits the wave count to 8 but
-    // doesn't spill SGPRs as much as when 80 is set.
-    FIXED_SGPR_COUNT_FOR_INIT_BUG = 96
-  };
-
 private:
   SIInstrInfo InstrInfo;
   SIFrameLowering FrameLowering;
@@ -644,39 +636,36 @@ public:
 
   /// \returns SGPR allocation granularity supported by the subtarget.
   unsigned getSGPRAllocGranule() const {
-    if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
-      return 16;
-    return 8;
+    return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
   }
 
   /// \returns SGPR encoding granularity supported by the subtarget.
   unsigned getSGPREncodingGranule() const {
-    return 8;
+    return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
   }
 
   /// \returns Total number of SGPRs supported by the subtarget.
   unsigned getTotalNumSGPRs() const {
-    if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
-      return 800;
-    return 512;
+    return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
   }
 
   /// \returns Addressable number of SGPRs supported by the subtarget.
   unsigned getAddressableNumSGPRs() const {
-    if (hasSGPRInitBug())
-      return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
-    if (getGeneration() >= VOLCANIC_ISLANDS)
-      return 102;
-    return 104;
+    return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
   }
 
   /// \returns Minimum number of SGPRs that meets the given number of waves per
   /// execution unit requirement supported by the subtarget.
-  unsigned getMinNumSGPRs(unsigned WavesPerEU) const;
+  unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
+    return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
+  }
 
   /// \returns Maximum number of SGPRs that meets the given number of waves per
   /// execution unit requirement supported by the subtarget.
-  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const;
+  unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
+    return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
+                                           Addressable);
+  }
 
   /// \returns Reserved number of SGPRs for given function \p MF.
   unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
@@ -693,31 +682,35 @@ public:
 
   /// \returns VGPR allocation granularity supported by the subtarget.
   unsigned getVGPRAllocGranule() const {
-    return 4;
+    return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());;
   }
 
   /// \returns VGPR encoding granularity supported by the subtarget.
   unsigned getVGPREncodingGranule() const {
-    return getVGPRAllocGranule();
+    return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
   }
 
   /// \returns Total number of VGPRs supported by the subtarget.
   unsigned getTotalNumVGPRs() const {
-    return 256;
+    return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
   }
 
   /// \returns Addressable number of VGPRs supported by the subtarget.
   unsigned getAddressableNumVGPRs() const {
-    return getTotalNumVGPRs();
+    return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
   }
 
   /// \returns Minimum number of VGPRs that meets given number of waves per
   /// execution unit requirement supported by the subtarget.
-  unsigned getMinNumVGPRs(unsigned WavesPerEU) const;
+  unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
+    return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
+  }
 
   /// \returns Maximum number of VGPRs that meets given number of waves per
   /// execution unit requirement supported by the subtarget.
-  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const;
+  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
+    return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
+  }
 
   /// \returns Reserved number of VGPRs for given function \p MF.
   unsigned getReservedNumVGPRs(const MachineFunction &MF) const {

Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed Feb  8 08:05:23 2017
@@ -801,14 +801,16 @@ public:
       // Currently there is none suitable machinery in the core llvm-mc for this.
       // MCSymbol::isRedefinable is intended for another purpose, and
       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
-      AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
+      AMDGPU::IsaInfo::IsaVersion ISA =
+          AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits());
       MCContext &Ctx = getContext();
-      MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
-      Sym->setVariableValue(MCConstantExpr::create(Isa.Major, Ctx));
+      MCSymbol *Sym =
+          Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
+      Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
-      Sym->setVariableValue(MCConstantExpr::create(Isa.Minor, Ctx));
+      Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
-      Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx));
+      Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
     }
     KernelScope.initialize(getContext());
   }
@@ -1867,9 +1869,10 @@ bool AMDGPUAsmParser::ParseDirectiveHSAC
   // If this directive has no arguments, then use the ISA version for the
   // targeted GPU.
   if (getLexer().is(AsmToken::EndOfStatement)) {
-    AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
-    getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor,
-                                                      Isa.Stepping,
+    AMDGPU::IsaInfo::IsaVersion ISA =
+        AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits());
+    getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
+                                                      ISA.Stepping,
                                                       "AMD", "AMDGPU");
     return false;
   }
@@ -2455,13 +2458,14 @@ bool AMDGPUAsmParser::parseCnt(int64_t &
   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
     Parser.Lex();
 
-  IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
+  AMDGPU::IsaInfo::IsaVersion ISA =
+      AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits());
   if (CntName == "vmcnt")
-    IntVal = encodeVmcnt(IV, IntVal, CntVal);
+    IntVal = encodeVmcnt(ISA, IntVal, CntVal);
   else if (CntName == "expcnt")
-    IntVal = encodeExpcnt(IV, IntVal, CntVal);
+    IntVal = encodeExpcnt(ISA, IntVal, CntVal);
   else if (CntName == "lgkmcnt")
-    IntVal = encodeLgkmcnt(IV, IntVal, CntVal);
+    IntVal = encodeLgkmcnt(ISA, IntVal, CntVal);
   else
     return true;
 
@@ -2470,8 +2474,9 @@ bool AMDGPUAsmParser::parseCnt(int64_t &
 
 OperandMatchResultTy
 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
-  IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
-  int64_t Waitcnt = getWaitcntBitMask(IV);
+  AMDGPU::IsaInfo::IsaVersion ISA =
+      AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits());
+  int64_t Waitcnt = getWaitcntBitMask(ISA);
   SMLoc S = Parser.getTok().getLoc();
 
   switch(getLexer().getKind()) {

Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Wed Feb  8 08:05:23 2017
@@ -1057,27 +1057,28 @@ void AMDGPUInstPrinter::printSendMsg(con
 void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
                                       const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
-  IsaVersion IV = getIsaVersion(STI.getFeatureBits());
+  AMDGPU::IsaInfo::IsaVersion ISA =
+      AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
 
   unsigned SImm16 = MI->getOperand(OpNo).getImm();
   unsigned Vmcnt, Expcnt, Lgkmcnt;
-  decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt);
+  decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
 
   bool NeedSpace = false;
 
-  if (Vmcnt != getVmcntBitMask(IV)) {
+  if (Vmcnt != getVmcntBitMask(ISA)) {
     O << "vmcnt(" << Vmcnt << ')';
     NeedSpace = true;
   }
 
-  if (Expcnt != getExpcntBitMask(IV)) {
+  if (Expcnt != getExpcntBitMask(ISA)) {
     if (NeedSpace)
       O << ' ';
     O << "expcnt(" << Expcnt << ')';
     NeedSpace = true;
   }
 
-  if (Lgkmcnt != getLgkmcntBitMask(IV)) {
+  if (Lgkmcnt != getLgkmcntBitMask(ISA)) {
     if (NeedSpace)
       O << ' ';
     O << "lgkmcnt(" << Lgkmcnt << ')';

Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp Wed Feb  8 08:05:23 2017
@@ -16,6 +16,7 @@
 #include "AMDGPU.h"
 #include "AMDGPURuntimeMetadata.h"
 #include "MCTargetDesc/AMDGPURuntimeMD.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -92,9 +93,30 @@ template <> struct MappingTraits<Kernel:
   static const bool flow = true;
 };
 
+template <> struct MappingTraits<IsaInfo::Metadata> {
+  static void mapping(IO &YamlIO, IsaInfo::Metadata &I) {
+    YamlIO.mapRequired(KeyName::IsaInfoWavefrontSize, I.WavefrontSize);
+    YamlIO.mapRequired(KeyName::IsaInfoLocalMemorySize, I.LocalMemorySize);
+    YamlIO.mapRequired(KeyName::IsaInfoEUsPerCU, I.EUsPerCU);
+    YamlIO.mapRequired(KeyName::IsaInfoMaxWavesPerEU, I.MaxWavesPerEU);
+    YamlIO.mapRequired(KeyName::IsaInfoMaxFlatWorkGroupSize,
+        I.MaxFlatWorkGroupSize);
+    YamlIO.mapRequired(KeyName::IsaInfoSGPRAllocGranule, I.SGPRAllocGranule);
+    YamlIO.mapRequired(KeyName::IsaInfoTotalNumSGPRs, I.TotalNumSGPRs);
+    YamlIO.mapRequired(KeyName::IsaInfoAddressableNumSGPRs,
+        I.AddressableNumSGPRs);
+    YamlIO.mapRequired(KeyName::IsaInfoVGPRAllocGranule, I.VGPRAllocGranule);
+    YamlIO.mapRequired(KeyName::IsaInfoTotalNumVGPRs, I.TotalNumVGPRs);
+    YamlIO.mapRequired(KeyName::IsaInfoAddressableNumVGPRs,
+        I.AddressableNumVGPRs);
+  }
+  static const bool flow = true;
+};
+
 template <> struct MappingTraits<Program::Metadata> {
   static void mapping(IO &YamlIO, Program::Metadata &Prog) {
     YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq);
+    YamlIO.mapRequired(KeyName::IsaInfo, Prog.IsaInfo);
     YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo);
     YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels);
   }
@@ -383,10 +405,27 @@ static void checkRuntimeMDYAMLString(con
   }
 }
 
-std::string llvm::getRuntimeMDYAMLString(Module &M) {
+std::string llvm::getRuntimeMDYAMLString(const FeatureBitset &Features,
+                                         const Module &M) {
   Program::Metadata Prog;
   Prog.MDVersionSeq.push_back(MDVersion);
   Prog.MDVersionSeq.push_back(MDRevision);
+  Prog.IsaInfo.WavefrontSize = AMDGPU::IsaInfo::getWavefrontSize(Features);
+  Prog.IsaInfo.LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(Features);
+  Prog.IsaInfo.EUsPerCU = AMDGPU::IsaInfo::getEUsPerCU(Features);
+  Prog.IsaInfo.MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(Features);
+  Prog.IsaInfo.MaxFlatWorkGroupSize =
+      AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(Features);
+  Prog.IsaInfo.SGPRAllocGranule =
+      AMDGPU::IsaInfo::getSGPRAllocGranule(Features);
+  Prog.IsaInfo.TotalNumSGPRs = AMDGPU::IsaInfo::getTotalNumSGPRs(Features);
+  Prog.IsaInfo.AddressableNumSGPRs =
+      AMDGPU::IsaInfo::getAddressableNumSGPRs(Features);
+  Prog.IsaInfo.VGPRAllocGranule =
+      AMDGPU::IsaInfo::getVGPRAllocGranule(Features);
+  Prog.IsaInfo.TotalNumVGPRs = AMDGPU::IsaInfo::getTotalNumVGPRs(Features);
+  Prog.IsaInfo.AddressableNumVGPRs =
+      AMDGPU::IsaInfo::getAddressableNumVGPRs(Features);
 
   // Set PrintfInfo.
   if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {

Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h Wed Feb  8 08:05:23 2017
@@ -17,10 +17,12 @@
 #include <string>
 
 namespace llvm {
+class FeatureBitset;
 class Module;
 
 // Get runtime metadata as YAML string.
-std::string getRuntimeMDYAMLString(Module &M);
+std::string getRuntimeMDYAMLString(const FeatureBitset &Features,
+                                   const Module &M);
 
 }
 #endif

Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp Wed Feb  8 08:05:23 2017
@@ -93,9 +93,10 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPU
   OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
 }
 
-void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) {
+void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
+                                                  const Module &M) {
   OS << "\t.amdgpu_runtime_metadata\n";
-  OS << getRuntimeMDYAMLString(M);
+  OS << getRuntimeMDYAMLString(Features, M);
   OS << "\n\t.end_amdgpu_runtime_metadata\n";
 }
 
@@ -236,6 +237,7 @@ void AMDGPUTargetELFStreamer::EmitRuntim
   );
 }
 
-void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) {
-  EmitRuntimeMetadata(getRuntimeMDYAMLString(M));
+void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
+                                                  const Module &M) {
+  EmitRuntimeMetadata(getRuntimeMDYAMLString(Features, M));
 }

Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h Wed Feb  8 08:05:23 2017
@@ -17,6 +17,7 @@ namespace llvm {
 #include "AMDGPUPTNote.h"
 
 class DataLayout;
+class FeatureBitset;
 class Function;
 class MCELFStreamer;
 class MCSymbol;
@@ -46,7 +47,8 @@ public:
 
   virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
 
-  virtual void EmitRuntimeMetadata(Module &M) = 0;
+  virtual void EmitRuntimeMetadata(const FeatureBitset &Features,
+                                   const Module &M) = 0;
 
   virtual void EmitRuntimeMetadata(StringRef Metadata) = 0;
 };
@@ -70,7 +72,8 @@ public:
 
   void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
 
-  void EmitRuntimeMetadata(Module &M) override;
+  void EmitRuntimeMetadata(const FeatureBitset &Features,
+                           const Module &M) override;
 
   void EmitRuntimeMetadata(StringRef Metadata) override;
 };
@@ -101,7 +104,8 @@ public:
 
   void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
 
-  void EmitRuntimeMetadata(Module &M) override;
+  void EmitRuntimeMetadata(const FeatureBitset &Features,
+                           const Module &M) override;
 
   void EmitRuntimeMetadata(StringRef Metadata) override;
 };

Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp Wed Feb  8 08:05:23 2017
@@ -47,7 +47,6 @@
 #define DEBUG_TYPE "si-insert-waits"
 
 using namespace llvm;
-using namespace llvm::AMDGPU;
 
 namespace {
 
@@ -76,7 +75,7 @@ private:
   const SIInstrInfo *TII = nullptr;
   const SIRegisterInfo *TRI = nullptr;
   const MachineRegisterInfo *MRI;
-  IsaVersion IV;
+  AMDGPU::IsaInfo::IsaVersion ISA;
 
   /// \brief Constant zero value
   static const Counters ZeroCounts;
@@ -427,10 +426,10 @@ bool SIInsertWaits::insertWait(MachineBa
 
   // Build the wait instruction
   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
-    .addImm(encodeWaitcnt(IV,
-                          Counts.Named.VM,
-                          Counts.Named.EXP,
-                          Counts.Named.LGKM));
+    .addImm(AMDGPU::encodeWaitcnt(ISA,
+                                  Counts.Named.VM,
+                                  Counts.Named.EXP,
+                                  Counts.Named.LGKM));
 
   LastOpcodeType = OTHER;
   LastInstWritesM0 = false;
@@ -458,9 +457,9 @@ void SIInsertWaits::handleExistingWait(M
   unsigned Imm = I->getOperand(0).getImm();
   Counters Counts, WaitOn;
 
-  Counts.Named.VM = decodeVmcnt(IV, Imm);
-  Counts.Named.EXP = decodeExpcnt(IV, Imm);
-  Counts.Named.LGKM = decodeLgkmcnt(IV, Imm);
+  Counts.Named.VM = AMDGPU::decodeVmcnt(ISA, Imm);
+  Counts.Named.EXP = AMDGPU::decodeExpcnt(ISA, Imm);
+  Counts.Named.LGKM = AMDGPU::decodeLgkmcnt(ISA, Imm);
 
   for (unsigned i = 0; i < 3; ++i) {
     if (Counts.Array[i] <= LastIssued.Array[i])
@@ -534,12 +533,12 @@ bool SIInsertWaits::runOnMachineFunction
   TII = ST->getInstrInfo();
   TRI = &TII->getRegisterInfo();
   MRI = &MF.getRegInfo();
-  IV = getIsaVersion(ST->getFeatureBits());
+  ISA = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
 
-  HardwareLimits.Named.VM = getVmcntBitMask(IV);
-  HardwareLimits.Named.EXP = getExpcntBitMask(IV);
-  HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
+  HardwareLimits.Named.VM = AMDGPU::getVmcntBitMask(ISA);
+  HardwareLimits.Named.EXP = AMDGPU::getExpcntBitMask(ISA);
+  HardwareLimits.Named.LGKM = AMDGPU::getLgkmcntBitMask(ISA);
 
   WaitedOn = ZeroCounts;
   DelayedWaitOn = ZeroCounts;

Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Wed Feb  8 08:05:23 2017
@@ -76,47 +76,240 @@ unsigned getLgkmcntBitShift() { return 8
 /// \returns Lgkmcnt bit width.
 unsigned getLgkmcntBitWidth() { return 4; }
 
-} // anonymous namespace
+} // namespace anonymous
 
 namespace llvm {
 namespace AMDGPU {
 
-IsaVersion getIsaVersion(const FeatureBitset &Features) {
+namespace IsaInfo {
 
+IsaVersion getIsaVersion(const FeatureBitset &Features) {
+  // CI.
   if (Features.test(FeatureISAVersion7_0_0))
     return {7, 0, 0};
-
   if (Features.test(FeatureISAVersion7_0_1))
     return {7, 0, 1};
-
   if (Features.test(FeatureISAVersion7_0_2))
     return {7, 0, 2};
 
+  // VI.
   if (Features.test(FeatureISAVersion8_0_0))
     return {8, 0, 0};
-
   if (Features.test(FeatureISAVersion8_0_1))
     return {8, 0, 1};
-
   if (Features.test(FeatureISAVersion8_0_2))
     return {8, 0, 2};
-
   if (Features.test(FeatureISAVersion8_0_3))
     return {8, 0, 3};
-
   if (Features.test(FeatureISAVersion8_0_4))
     return {8, 0, 4};
-
   if (Features.test(FeatureISAVersion8_1_0))
     return {8, 1, 0};
 
-  return {0, 0, 0};
+  if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
+    return {0, 0, 0};
+  return {7, 0, 0};
+}
+
+unsigned getWavefrontSize(const FeatureBitset &Features) {
+  if (Features.test(FeatureWavefrontSize16))
+    return 16;
+  if (Features.test(FeatureWavefrontSize32))
+    return 32;
+
+  return 64;
+}
+
+unsigned getLocalMemorySize(const FeatureBitset &Features) {
+  if (Features.test(FeatureLocalMemorySize32768))
+    return 32768;
+  if (Features.test(FeatureLocalMemorySize65536))
+    return 65536;
+
+  return 0;
+}
+
+unsigned getEUsPerCU(const FeatureBitset &Features) {
+  return 4;
+}
+
+unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+                               unsigned FlatWorkGroupSize) {
+  if (!Features.test(FeatureGCN))
+    return 8;
+  return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16;
+}
+
+unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
+  return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
+}
+
+unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+                          unsigned FlatWorkGroupSize) {
+  return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+}
+
+unsigned getMinWavesPerEU(const FeatureBitset &Features) {
+  return 1;
+}
+
+unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
+  if (!Features.test(FeatureGCN))
+    return 8;
+  // FIXME: Need to take scratch memory into account.
+  return 10;
+}
+
+unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+                          unsigned FlatWorkGroupSize) {
+  return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
+                 getEUsPerCU(Features)) / getEUsPerCU(Features);
+}
+
+unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
+  return 1;
+}
+
+unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
+  return 2048;
+}
+
+unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+                              unsigned FlatWorkGroupSize) {
+  return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
+                 getWavefrontSize(Features);
+}
+
+unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
+  IsaVersion Version = getIsaVersion(Features);
+  if (Version.Major >= 8)
+    return 16;
+  return 8;
+}
+
+unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
+  return 8;
+}
+
+unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
+  IsaVersion Version = getIsaVersion(Features);
+  if (Version.Major >= 8)
+    return 800;
+  return 512;
 }
 
+unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
+  if (Features.test(FeatureSGPRInitBug))
+    return FIXED_NUM_SGPRS_FOR_INIT_BUG;
+
+  IsaVersion Version = getIsaVersion(Features);
+  if (Version.Major >= 8)
+    return 102;
+  return 104;
+}
+
+unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+  IsaVersion Version = getIsaVersion(Features);
+  if (Version.Major >= 8) {
+    switch (WavesPerEU) {
+      case 0:  return 0;
+      case 10: return 0;
+      case 9:  return 0;
+      case 8:  return 81;
+      default: return 97;
+    }
+  } else {
+    switch (WavesPerEU) {
+      case 0:  return 0;
+      case 10: return 0;
+      case 9:  return 49;
+      case 8:  return 57;
+      case 7:  return 65;
+      case 6:  return 73;
+      case 5:  return 81;
+      default: return 97;
+    }
+  }
+}
+
+unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+                        bool Addressable) {
+  IsaVersion Version = getIsaVersion(Features);
+  if (Version.Major >= 8) {
+    switch (WavesPerEU) {
+      case 0:  return 80;
+      case 10: return 80;
+      case 9:  return 80;
+      case 8:  return 96;
+      default: return Addressable ? getAddressableNumSGPRs(Features) : 112;
+    }
+  } else {
+    switch (WavesPerEU) {
+      case 0:  return 48;
+      case 10: return 48;
+      case 9:  return 56;
+      case 8:  return 64;
+      case 7:  return 72;
+      case 6:  return 80;
+      case 5:  return 96;
+      default: return getAddressableNumSGPRs(Features);
+    }
+  }
+}
+
+unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
+  return 4;
+}
+
+unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
+  return getVGPRAllocGranule(Features);
+}
+
+unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
+  return 256;
+}
+
+unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
+  return getTotalNumVGPRs(Features);
+}
+
+unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+  switch (WavesPerEU) {
+    case 0:  return 0;
+    case 10: return 0;
+    case 9:  return 25;
+    case 8:  return 29;
+    case 7:  return 33;
+    case 6:  return 37;
+    case 5:  return 41;
+    case 4:  return 49;
+    case 3:  return 65;
+    case 2:  return 85;
+    default: return 129;
+  }
+}
+
+unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+  switch (WavesPerEU) {
+    case 0:  return 24;
+    case 10: return 24;
+    case 9:  return 28;
+    case 8:  return 32;
+    case 7:  return 36;
+    case 6:  return 40;
+    case 5:  return 48;
+    case 4:  return 64;
+    case 3:  return 84;
+    case 2:  return 128;
+    default: return getTotalNumVGPRs(Features);
+  }
+}
+
+} // namespace IsaInfo
+
 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
                                const FeatureBitset &Features) {
-
-  IsaVersion ISA = getIsaVersion(Features);
+  IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
 
   memset(&Header, 0, sizeof(Header));
 
@@ -224,57 +417,60 @@ std::pair<int, int> getIntegerPairAttrib
   return Ints;
 }
 
-unsigned getWaitcntBitMask(IsaVersion Version) {
-  unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
-  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
-  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
-  return Vmcnt | Expcnt | Lgkmcnt;
-}
-
-unsigned getVmcntBitMask(IsaVersion Version) {
+unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
   return (1 << getVmcntBitWidth()) - 1;
 }
 
-unsigned getExpcntBitMask(IsaVersion Version) {
+unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
   return (1 << getExpcntBitWidth()) - 1;
 }
 
-unsigned getLgkmcntBitMask(IsaVersion Version) {
+unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
   return (1 << getLgkmcntBitWidth()) - 1;
 }
 
-unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
+unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
+  unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
+  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
+  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
+  return Vmcnt | Expcnt | Lgkmcnt;
+}
+
+unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
   return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
 }
 
-unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
+unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 }
 
-unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
+unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 }
 
-void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
   Vmcnt = decodeVmcnt(Version, Waitcnt);
   Expcnt = decodeExpcnt(Version, Waitcnt);
   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
 }
 
-unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
+unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+                     unsigned Vmcnt) {
   return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
 }
 
-unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
+unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+                      unsigned Expcnt) {
   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
 }
 
-unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
+unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+                       unsigned Lgkmcnt) {
   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
 }
 
-unsigned encodeWaitcnt(IsaVersion Version,
+unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
   unsigned Waitcnt = getWaitcntBitMask(Version);
   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);

Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Wed Feb  8 08:05:23 2017
@@ -34,16 +34,118 @@ class MCSubtargetInfo;
 
 namespace AMDGPU {
 
-LLVM_READONLY
-int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+namespace IsaInfo {
 
+enum {
+  // The closed Vulkan driver sets 96, which limits the wave count to 8 but
+  // doesn't spill SGPRs as much as when 80 is set.
+  FIXED_NUM_SGPRS_FOR_INIT_BUG = 96
+};
+
+/// \brief Instruction set architecture version.
 struct IsaVersion {
   unsigned Major;
   unsigned Minor;
   unsigned Stepping;
 };
 
+/// \returns Isa version for given subtarget \p Features.
 IsaVersion getIsaVersion(const FeatureBitset &Features);
+
+/// \returns Wavefront size for given subtarget \p Features.
+unsigned getWavefrontSize(const FeatureBitset &Features);
+
+/// \returns Local memory size in bytes for given subtarget \p Features.
+unsigned getLocalMemorySize(const FeatureBitset &Features);
+
+/// \returns Number of execution units per compute unit for given subtarget \p
+/// Features.
+unsigned getEUsPerCU(const FeatureBitset &Features);
+
+/// \returns Maximum number of work groups per compute unit for given subtarget
+/// \p Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+                               unsigned FlatWorkGroupSize);
+
+/// \returns Maximum number of waves per compute unit for given subtarget \p
+/// Features without any kind of limitation.
+unsigned getMaxWavesPerCU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per compute unit for given subtarget \p
+/// Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+                          unsigned FlatWorkGroupSize);
+
+/// \returns Minimum number of waves per execution unit for given subtarget \p
+/// Features.
+unsigned getMinWavesPerEU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per execution unit for given subtarget \p
+/// Features without any kind of limitation.
+unsigned getMaxWavesPerEU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per execution unit for given subtarget \p
+/// Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+                          unsigned FlatWorkGroupSize);
+
+/// \returns Minimum flat work group size for given subtarget \p Features.
+unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
+
+/// \returns Maximum flat work group size for given subtarget \p Features.
+unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
+
+/// \returns Number of waves per work group for given subtarget \p Features and
+/// limited by given \p FlatWorkGroupSize.
+unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+                              unsigned FlatWorkGroupSize);
+
+/// \returns SGPR allocation granularity for given subtarget \p Features.
+unsigned getSGPRAllocGranule(const FeatureBitset &Features);
+
+/// \returns SGPR encoding granularity for given subtarget \p Features.
+unsigned getSGPREncodingGranule(const FeatureBitset &Features);
+
+/// \returns Total number of SGPRs for given subtarget \p Features.
+unsigned getTotalNumSGPRs(const FeatureBitset &Features);
+
+/// \returns Addressable number of SGPRs for given subtarget \p Features.
+unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
+
+/// \returns Minimum number of SGPRs that meets the given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+/// \returns Maximum number of SGPRs that meets the given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+                        bool Addressable);
+
+/// \returns VGPR allocation granularity for given subtarget \p Features.
+unsigned getVGPRAllocGranule(const FeatureBitset &Features);
+
+/// \returns VGPR encoding granularity for given subtarget \p Features.
+unsigned getVGPREncodingGranule(const FeatureBitset &Features);
+
+/// \returns Total number of VGPRs for given subtarget \p Features.
+unsigned getTotalNumVGPRs(const FeatureBitset &Features);
+
+/// \returns Addressable number of VGPRs for given subtarget \p Features.
+unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
+
+/// \returns Minimum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+/// \returns Maximum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+} // namespace IsaInfo
+
+LLVM_READONLY
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+
 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
                                const FeatureBitset &Features);
 MCSection *getHSATextSection(MCContext &Ctx);
@@ -84,26 +186,26 @@ std::pair<int, int> getIntegerPairAttrib
                                             std::pair<int, int> Default,
                                             bool OnlyFirstRequired = false);
 
-/// \returns Waitcnt bit mask for given isa \p Version.
-unsigned getWaitcntBitMask(IsaVersion Version);
-
 /// \returns Vmcnt bit mask for given isa \p Version.
-unsigned getVmcntBitMask(IsaVersion Version);
+unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
 
 /// \returns Expcnt bit mask for given isa \p Version.
-unsigned getExpcntBitMask(IsaVersion Version);
+unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
 
 /// \returns Lgkmcnt bit mask for given isa \p Version.
-unsigned getLgkmcntBitMask(IsaVersion Version);
+unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
+
+/// \returns Waitcnt bit mask for given isa \p Version.
+unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
 
 /// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
 
 /// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
 
 /// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
 
 /// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
 /// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
@@ -113,17 +215,20 @@ unsigned decodeLgkmcnt(IsaVersion Versio
 ///     \p Vmcnt = \p Waitcnt[3:0]
 ///     \p Expcnt = \p Waitcnt[6:4]
 ///     \p Lgkmcnt = \p Waitcnt[11:8]
-void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
 
 /// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt);
+unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+                     unsigned Vmcnt);
 
 /// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt);
+unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+                      unsigned Expcnt);
 
 /// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt);
+unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+                       unsigned Lgkmcnt);
 
 /// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
 /// \p Version.
@@ -135,7 +240,7 @@ unsigned encodeLgkmcnt(IsaVersion Versio
 ///
 /// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
 /// isa \p Version.
-unsigned encodeWaitcnt(IsaVersion Version,
+unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
 
 unsigned getInitialPSInputAddr(const Function &F);

Modified: llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll Wed Feb  8 08:05:23 2017
@@ -1,6 +1,6 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
 ; check llc does not crash for invalid opencl version metadata
 
-; CHECK: { amd.MDVersion: [ 2, 0 ] }
+; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
 
 !opencl.ocl.version = !{}

Modified: llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll Wed Feb  8 08:05:23 2017
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
 ; check llc does not crash for invalid opencl version metadata
 
-; CHECK: { amd.MDVersion: [ 2, 0 ] }
+; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
 
 !opencl.ocl.version = !{!0}
 !0 = !{}

Modified: llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll Wed Feb  8 08:05:23 2017
@@ -1,7 +1,7 @@
 ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
 ; check llc does not crash for invalid opencl version metadata
 
-; CHECK: { amd.MDVersion: [ 2, 0 ] }
+; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
 
 !opencl.ocl.version = !{!0}
 !0 = !{i32 1}

Modified: llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll Wed Feb  8 08:05:23 2017
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES
+; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=SI
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=VI
 ; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -amdgpu-dump-rtmd -amdgpu-check-rtmd-parser %s -o - 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=PARSER %s
 
 %struct.A = type { i8, float }
@@ -11,9 +12,10 @@
 %opencl.clk_event_t = type opaque
 
 ; CHECK: ---
-; CHECK-NEXT: { amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: 
+; SI: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: 
+; VI: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels: 
 
-; CHECK-NEXT:   - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
+; CHECK:   - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args: 
 ; CHECK-NEXT:       - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
 ; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
 ; CHECK-NEXT:       - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
@@ -345,7 +347,9 @@ define amdgpu_kernel void @test_pointee_
 ; NOTES-NEXT: Owner    Data size    Description
 ; NOTES-NEXT: AMD      0x00000008   Unknown note type: (0x00000001)
 ; NOTES-NEXT: AMD      0x0000001b   Unknown note type: (0x00000003)
-; NOTES-NEXT: AMD      0x00005196   Unknown note type: (0x00000008)
+
+; SI:         AMD      0x0000530d   Unknown note type: (0x00000008)
+; VI:         AMD      0x0000530e   Unknown note type: (0x00000008)
 
 !llvm.printf.fmts = !{!100, !101}