[llvm] r294449 - [AMDGPU] Add target information that is required by tools to metadata
Konstantin Zhuravlyov via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 8 06:05:23 PST 2017
Author: kzhuravl
Date: Wed Feb 8 08:05:23 2017
New Revision: 294449
URL: http://llvm.org/viewvc/llvm-project?rev=294449&view=rev
Log:
[AMDGPU] Add target information that is required by tools to metadata
Differential Revision: https://reviews.llvm.org/D28760#fb670e28
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll
llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll
llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll
llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Wed Feb 8 08:05:23 2017
@@ -109,12 +109,13 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
TS->EmitDirectiveHSACodeObjectVersion(2, 1);
const MCSubtargetInfo *STI = TM.getMCSubtargetInfo();
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(STI->getFeatureBits());
TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
"AMD", "AMDGPU");
// Emit runtime metadata.
- TS->EmitRuntimeMetadata(M);
+ TS->EmitRuntimeMetadata(STI->getFeatureBits(), M);
}
bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
@@ -485,7 +486,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
"addressable scalar registers",
MaxSGPR + 1, DS_Error,
- DK_ResourceLimit, MaxAddressableNumSGPRs);
+ DK_ResourceLimit,
+ MaxAddressableNumSGPRs);
Ctx.diagnose(Diag);
MaxSGPR = MaxAddressableNumSGPRs - 1;
}
@@ -509,25 +511,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
STM.hasSGPRInitBug()) {
- unsigned MaxNumSGPRs = STM.getAddressableNumSGPRs();
- if (ProgInfo.NumSGPR > MaxNumSGPRs) {
- // This can happen due to a compiler bug or when using inline asm to use the
- // registers which are usually reserved for vcc etc.
-
+ unsigned MaxAddressableNumSGPRs = STM.getAddressableNumSGPRs();
+ if (ProgInfo.NumSGPR > MaxAddressableNumSGPRs) {
+ // This can happen due to a compiler bug or when using inline asm to use
+ // the registers which are usually reserved for vcc etc.
LLVMContext &Ctx = MF.getFunction()->getContext();
DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
"scalar registers",
ProgInfo.NumSGPR, DS_Error,
- DK_ResourceLimit, MaxNumSGPRs);
+ DK_ResourceLimit,
+ MaxAddressableNumSGPRs);
Ctx.diagnose(Diag);
- ProgInfo.NumSGPR = MaxNumSGPRs;
- ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;
+ ProgInfo.NumSGPR = MaxAddressableNumSGPRs;
+ ProgInfo.NumSGPRsForWavesPerEU = MaxAddressableNumSGPRs;
}
}
if (STM.hasSGPRInitBug()) {
- ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
- ProgInfo.NumSGPRsForWavesPerEU = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ ProgInfo.NumSGPR =
+ AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
+ ProgInfo.NumSGPRsForWavesPerEU =
+ AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
}
if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
@@ -554,9 +558,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
STM.getVGPREncodingGranule());
ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / STM.getVGPREncodingGranule() - 1;
- // Record first reserved register and reserved register count fields, and
- // update max register counts if "amdgpu-debugger-reserve-regs" attribute was
- // requested.
+ // Record first reserved VGPR and number of reserved VGPRs.
ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
ProgInfo.ReservedVGPRCount = STM.getReservedNumVGPRs(MF);
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h Wed Feb 8 08:05:23 2017
@@ -41,37 +41,90 @@ namespace RuntimeMD {
// Version and revision of runtime metadata
const unsigned char MDVersion = 2;
- const unsigned char MDRevision = 0;
+ const unsigned char MDRevision = 1;
// Name of keys for runtime metadata.
namespace KeyName {
- const char MDVersion[] = "amd.MDVersion"; // Runtime metadata version
- const char Language[] = "amd.Language"; // Language
- const char LanguageVersion[] = "amd.LanguageVersion"; // Language version
- const char Kernels[] = "amd.Kernels"; // Kernels
- const char KernelName[] = "amd.KernelName"; // Kernel name
- const char Args[] = "amd.Args"; // Kernel arguments
- const char ArgSize[] = "amd.ArgSize"; // Kernel arg size
- const char ArgAlign[] = "amd.ArgAlign"; // Kernel arg alignment
- const char ArgTypeName[] = "amd.ArgTypeName"; // Kernel type name
- const char ArgName[] = "amd.ArgName"; // Kernel name
- const char ArgKind[] = "amd.ArgKind"; // Kernel argument kind
- const char ArgValueType[] = "amd.ArgValueType"; // Kernel argument value type
- const char ArgAddrQual[] = "amd.ArgAddrQual"; // Kernel argument address qualifier
- const char ArgAccQual[] = "amd.ArgAccQual"; // Kernel argument access qualifier
- const char ArgIsConst[] = "amd.ArgIsConst"; // Kernel argument is const qualified
- const char ArgIsRestrict[] = "amd.ArgIsRestrict"; // Kernel argument is restrict qualified
- const char ArgIsVolatile[] = "amd.ArgIsVolatile"; // Kernel argument is volatile qualified
- const char ArgIsPipe[] = "amd.ArgIsPipe"; // Kernel argument is pipe qualified
- const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize"; // Required work group size
- const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint"; // Work group size hint
- const char VecTypeHint[] = "amd.VecTypeHint"; // Vector type hint
- const char KernelIndex[] = "amd.KernelIndex"; // Kernel index for device enqueue
- const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups"; // No partial work groups
- const char PrintfInfo[] = "amd.PrintfInfo"; // Prinf function call information
- const char ArgActualAcc[] = "amd.ArgActualAcc"; // The actual kernel argument access qualifier
- const char ArgPointeeAlign[] = "amd.ArgPointeeAlign"; // Alignment of pointee type
+ // Runtime metadata version
+ const char MDVersion[] = "amd.MDVersion";
+
+ // Instruction set architecture information
+ const char IsaInfo[] = "amd.IsaInfo";
+ // Wavefront size
+ const char IsaInfoWavefrontSize[] = "amd.IsaInfoWavefrontSize";
+ // Local memory size in bytes
+ const char IsaInfoLocalMemorySize[] = "amd.IsaInfoLocalMemorySize";
+ // Number of execution units per compute unit
+ const char IsaInfoEUsPerCU[] = "amd.IsaInfoEUsPerCU";
+ // Maximum number of waves per execution unit
+ const char IsaInfoMaxWavesPerEU[] = "amd.IsaInfoMaxWavesPerEU";
+ // Maximum flat work group size
+ const char IsaInfoMaxFlatWorkGroupSize[] = "amd.IsaInfoMaxFlatWorkGroupSize";
+ // SGPR allocation granularity
+ const char IsaInfoSGPRAllocGranule[] = "amd.IsaInfoSGPRAllocGranule";
+ // Total number of SGPRs
+ const char IsaInfoTotalNumSGPRs[] = "amd.IsaInfoTotalNumSGPRs";
+ // Addressable number of SGPRs
+ const char IsaInfoAddressableNumSGPRs[] = "amd.IsaInfoAddressableNumSGPRs";
+ // VGPR allocation granularity
+ const char IsaInfoVGPRAllocGranule[] = "amd.IsaInfoVGPRAllocGranule";
+ // Total number of VGPRs
+ const char IsaInfoTotalNumVGPRs[] = "amd.IsaInfoTotalNumVGPRs";
+ // Addressable number of VGPRs
+ const char IsaInfoAddressableNumVGPRs[] = "amd.IsaInfoAddressableNumVGPRs";
+
+ // Language
+ const char Language[] = "amd.Language";
+ // Language version
+ const char LanguageVersion[] = "amd.LanguageVersion";
+
+ // Kernels
+ const char Kernels[] = "amd.Kernels";
+ // Kernel name
+ const char KernelName[] = "amd.KernelName";
+ // Kernel arguments
+ const char Args[] = "amd.Args";
+ // Kernel argument size in bytes
+ const char ArgSize[] = "amd.ArgSize";
+ // Kernel argument alignment
+ const char ArgAlign[] = "amd.ArgAlign";
+ // Kernel argument type name
+ const char ArgTypeName[] = "amd.ArgTypeName";
+ // Kernel argument name
+ const char ArgName[] = "amd.ArgName";
+ // Kernel argument kind
+ const char ArgKind[] = "amd.ArgKind";
+ // Kernel argument value type
+ const char ArgValueType[] = "amd.ArgValueType";
+ // Kernel argument address qualifier
+ const char ArgAddrQual[] = "amd.ArgAddrQual";
+ // Kernel argument access qualifier
+ const char ArgAccQual[] = "amd.ArgAccQual";
+ // Kernel argument is const qualified
+ const char ArgIsConst[] = "amd.ArgIsConst";
+ // Kernel argument is restrict qualified
+ const char ArgIsRestrict[] = "amd.ArgIsRestrict";
+ // Kernel argument is volatile qualified
+ const char ArgIsVolatile[] = "amd.ArgIsVolatile";
+ // Kernel argument is pipe qualified
+ const char ArgIsPipe[] = "amd.ArgIsPipe";
+ // Required work group size
+ const char ReqdWorkGroupSize[] = "amd.ReqdWorkGroupSize";
+ // Work group size hint
+ const char WorkGroupSizeHint[] = "amd.WorkGroupSizeHint";
+ // Vector type hint
+ const char VecTypeHint[] = "amd.VecTypeHint";
+ // Kernel index for device enqueue
+ const char KernelIndex[] = "amd.KernelIndex";
+ // No partial work groups
+ const char NoPartialWorkGroups[] = "amd.NoPartialWorkGroups";
+ // Prinf function call information
+ const char PrintfInfo[] = "amd.PrintfInfo";
+ // The actual kernel argument access qualifier
+ const char ArgActualAcc[] = "amd.ArgActualAcc";
+ // Alignment of pointee type
+ const char ArgPointeeAlign[] = "amd.ArgPointeeAlign";
} // end namespace KeyName
@@ -175,11 +228,45 @@ namespace RuntimeMD {
} // end namespace Kernel
+ namespace IsaInfo {
+
+ /// \brief In-memory representation of instruction set architecture
+ /// information.
+ struct Metadata {
+ /// \brief Wavefront size.
+ unsigned WavefrontSize = 0;
+ /// \brief Local memory size in bytes.
+ unsigned LocalMemorySize = 0;
+ /// \brief Number of execution units per compute unit.
+ unsigned EUsPerCU = 0;
+ /// \brief Maximum number of waves per execution unit.
+ unsigned MaxWavesPerEU = 0;
+ /// \brief Maximum flat work group size.
+ unsigned MaxFlatWorkGroupSize = 0;
+ /// \brief SGPR allocation granularity.
+ unsigned SGPRAllocGranule = 0;
+ /// \brief Total number of SGPRs.
+ unsigned TotalNumSGPRs = 0;
+ /// \brief Addressable number of SGPRs.
+ unsigned AddressableNumSGPRs = 0;
+ /// \brief VGPR allocation granularity.
+ unsigned VGPRAllocGranule = 0;
+ /// \brief Total number of VGPRs.
+ unsigned TotalNumVGPRs = 0;
+ /// \brief Addressable number of VGPRs.
+ unsigned AddressableNumVGPRs = 0;
+
+ Metadata() = default;
+ };
+
+ } // end namespace IsaInfo
+
namespace Program {
// In-memory representation of program information.
struct Metadata {
std::vector<uint8_t> MDVersionSeq;
+ IsaInfo::Metadata IsaInfo;
std::vector<std::string> PrintfInfo;
std::vector<Kernel::Metadata> Kernels;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Wed Feb 8 08:05:23 2017
@@ -323,53 +323,6 @@ unsigned SISubtarget::getOccupancyWithNu
return 1;
}
-unsigned SISubtarget::getMinNumSGPRs(unsigned WavesPerEU) const {
- if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- switch (WavesPerEU) {
- case 0: return 0;
- case 10: return 0;
- case 9: return 0;
- case 8: return 81;
- default: return 97;
- }
- } else {
- switch (WavesPerEU) {
- case 0: return 0;
- case 10: return 0;
- case 9: return 49;
- case 8: return 57;
- case 7: return 65;
- case 6: return 73;
- case 5: return 81;
- default: return 97;
- }
- }
-}
-
-unsigned SISubtarget::getMaxNumSGPRs(unsigned WavesPerEU,
- bool Addressable) const {
- if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- switch (WavesPerEU) {
- case 0: return 80;
- case 10: return 80;
- case 9: return 80;
- case 8: return 96;
- default: return Addressable ? getAddressableNumSGPRs() : 112;
- }
- } else {
- switch (WavesPerEU) {
- case 0: return 48;
- case 10: return 48;
- case 9: return 56;
- case 8: return 64;
- case 7: return 72;
- case 6: return 80;
- case 5: return 96;
- default: return getAddressableNumSGPRs();
- }
- }
-}
-
unsigned SISubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
if (MFI.hasFlatScratchInit()) {
@@ -428,44 +381,12 @@ unsigned SISubtarget::getMaxNumSGPRs(con
}
if (hasSGPRInitBug())
- MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+ MaxNumSGPRs = AMDGPU::IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
return std::min(MaxNumSGPRs - getReservedNumSGPRs(MF),
MaxAddressableNumSGPRs);
}
-unsigned SISubtarget::getMinNumVGPRs(unsigned WavesPerEU) const {
- switch (WavesPerEU) {
- case 0: return 0;
- case 10: return 0;
- case 9: return 25;
- case 8: return 29;
- case 7: return 33;
- case 6: return 37;
- case 5: return 41;
- case 4: return 49;
- case 3: return 65;
- case 2: return 85;
- default: return 129;
- }
-}
-
-unsigned SISubtarget::getMaxNumVGPRs(unsigned WavesPerEU) const {
- switch (WavesPerEU) {
- case 0: return 24;
- case 10: return 24;
- case 9: return 28;
- case 8: return 32;
- case 7: return 36;
- case 6: return 40;
- case 5: return 48;
- case 4: return 64;
- case 3: return 84;
- case 2: return 128;
- default: return getTotalNumVGPRs();
- }
-}
-
unsigned SISubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
const Function &F = *MF.getFunction();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Wed Feb 8 08:05:23 2017
@@ -365,72 +365,71 @@ public:
return true;
}
+ void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
+ bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
+
/// \returns Number of execution units per compute unit supported by the
/// subtarget.
unsigned getEUsPerCU() const {
- return 4;
+ return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
}
/// \returns Maximum number of work groups per compute unit supported by the
- /// subtarget and limited by given flat work group size.
+ /// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
- if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 8;
- return getWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16;
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerCU() const {
- return getMaxWavesPerEU() * getEUsPerCU();
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
}
/// \returns Maximum number of waves per compute unit supported by the
- /// subtarget and limited by given flat work group size.
+ /// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return getWavesPerWorkGroup(FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
unsigned getMinWavesPerEU() const {
- return 1;
+ return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
}
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerEU() const {
- if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
- return 8;
- // FIXME: Need to take scratch memory into account.
- return 10;
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
}
/// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given flat work group size.
+ /// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
- return alignTo(getMaxWavesPerCU(FlatWorkGroupSize), getEUsPerCU()) /
- getEUsPerCU();
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Minimum flat work group size supported by the subtarget.
unsigned getMinFlatWorkGroupSize() const {
- return 1;
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
}
/// \returns Maximum flat work group size supported by the subtarget.
unsigned getMaxFlatWorkGroupSize() const {
- return 2048;
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
}
- /// \returns Number of waves per work group given the flat work group size.
+ /// \returns Number of waves per work group supported by the subtarget and
+ /// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize();
+ return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
+ FlatWorkGroupSize);
}
- void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
- bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
-
/// \returns Subtarget's default pair of minimum/maximum flat work group sizes
/// for function \p F, or minimum/maximum flat work group sizes explicitly
/// requested using "amdgpu-flat-work-group-size" attribute attached to
@@ -492,13 +491,6 @@ public:
};
class SISubtarget final : public AMDGPUSubtarget {
-public:
- enum {
- // The closed Vulkan driver sets 96, which limits the wave count to 8 but
- // doesn't spill SGPRs as much as when 80 is set.
- FIXED_SGPR_COUNT_FOR_INIT_BUG = 96
- };
-
private:
SIInstrInfo InstrInfo;
SIFrameLowering FrameLowering;
@@ -644,39 +636,36 @@ public:
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
- if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return 16;
- return 8;
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
}
/// \returns SGPR encoding granularity supported by the subtarget.
unsigned getSGPREncodingGranule() const {
- return 8;
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
}
/// \returns Total number of SGPRs supported by the subtarget.
unsigned getTotalNumSGPRs() const {
- if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
- return 800;
- return 512;
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
}
/// \returns Addressable number of SGPRs supported by the subtarget.
unsigned getAddressableNumSGPRs() const {
- if (hasSGPRInitBug())
- return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
- if (getGeneration() >= VOLCANIC_ISLANDS)
- return 102;
- return 104;
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
}
/// \returns Minimum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
- unsigned getMinNumSGPRs(unsigned WavesPerEU) const;
+ unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
+ }
/// \returns Maximum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
- unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const;
+ unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
+ Addressable);
+ }
/// \returns Reserved number of SGPRs for given function \p MF.
unsigned getReservedNumSGPRs(const MachineFunction &MF) const;
@@ -693,31 +682,35 @@ public:
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
- return 4;
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());;
}
/// \returns VGPR encoding granularity supported by the subtarget.
unsigned getVGPREncodingGranule() const {
- return getVGPRAllocGranule();
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
}
/// \returns Total number of VGPRs supported by the subtarget.
unsigned getTotalNumVGPRs() const {
- return 256;
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
}
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
- return getTotalNumVGPRs();
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
}
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
- unsigned getMinNumVGPRs(unsigned WavesPerEU) const;
+ unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
+ }
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
- unsigned getMaxNumVGPRs(unsigned WavesPerEU) const;
+ unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
+ }
/// \returns Reserved number of VGPRs for given function \p MF.
unsigned getReservedNumVGPRs(const MachineFunction &MF) const {
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed Feb 8 08:05:23 2017
@@ -801,14 +801,16 @@ public:
// Currently there is none suitable machinery in the core llvm-mc for this.
// MCSymbol::isRedefinable is intended for another purpose, and
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
- AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits());
MCContext &Ctx = getContext();
- MCSymbol *Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
- Sym->setVariableValue(MCConstantExpr::create(Isa.Major, Ctx));
+ MCSymbol *Sym =
+ Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
- Sym->setVariableValue(MCConstantExpr::create(Isa.Minor, Ctx));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
- Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx));
+ Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
}
KernelScope.initialize(getContext());
}
@@ -1867,9 +1869,10 @@ bool AMDGPUAsmParser::ParseDirectiveHSAC
// If this directive has no arguments, then use the ISA version for the
// targeted GPU.
if (getLexer().is(AsmToken::EndOfStatement)) {
- AMDGPU::IsaVersion Isa = AMDGPU::getIsaVersion(getSTI().getFeatureBits());
- getTargetStreamer().EmitDirectiveHSACodeObjectISA(Isa.Major, Isa.Minor,
- Isa.Stepping,
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits());
+ getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
+ ISA.Stepping,
"AMD", "AMDGPU");
return false;
}
@@ -2455,13 +2458,14 @@ bool AMDGPUAsmParser::parseCnt(int64_t &
if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
Parser.Lex();
- IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits());
if (CntName == "vmcnt")
- IntVal = encodeVmcnt(IV, IntVal, CntVal);
+ IntVal = encodeVmcnt(ISA, IntVal, CntVal);
else if (CntName == "expcnt")
- IntVal = encodeExpcnt(IV, IntVal, CntVal);
+ IntVal = encodeExpcnt(ISA, IntVal, CntVal);
else if (CntName == "lgkmcnt")
- IntVal = encodeLgkmcnt(IV, IntVal, CntVal);
+ IntVal = encodeLgkmcnt(ISA, IntVal, CntVal);
else
return true;
@@ -2470,8 +2474,9 @@ bool AMDGPUAsmParser::parseCnt(int64_t &
OperandMatchResultTy
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
- IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
- int64_t Waitcnt = getWaitcntBitMask(IV);
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getSTI().getFeatureBits());
+ int64_t Waitcnt = getWaitcntBitMask(ISA);
SMLoc S = Parser.getTok().getLoc();
switch(getLexer().getKind()) {
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Wed Feb 8 08:05:23 2017
@@ -1057,27 +1057,28 @@ void AMDGPUInstPrinter::printSendMsg(con
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- IsaVersion IV = getIsaVersion(STI.getFeatureBits());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
unsigned SImm16 = MI->getOperand(OpNo).getImm();
unsigned Vmcnt, Expcnt, Lgkmcnt;
- decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt);
+ decodeWaitcnt(ISA, SImm16, Vmcnt, Expcnt, Lgkmcnt);
bool NeedSpace = false;
- if (Vmcnt != getVmcntBitMask(IV)) {
+ if (Vmcnt != getVmcntBitMask(ISA)) {
O << "vmcnt(" << Vmcnt << ')';
NeedSpace = true;
}
- if (Expcnt != getExpcntBitMask(IV)) {
+ if (Expcnt != getExpcntBitMask(ISA)) {
if (NeedSpace)
O << ' ';
O << "expcnt(" << Expcnt << ')';
NeedSpace = true;
}
- if (Lgkmcnt != getLgkmcntBitMask(IV)) {
+ if (Lgkmcnt != getLgkmcntBitMask(ISA)) {
if (NeedSpace)
O << ' ';
O << "lgkmcnt(" << Lgkmcnt << ')';
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp Wed Feb 8 08:05:23 2017
@@ -16,6 +16,7 @@
#include "AMDGPU.h"
#include "AMDGPURuntimeMetadata.h"
#include "MCTargetDesc/AMDGPURuntimeMD.h"
+#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/StringSwitch.h"
@@ -92,9 +93,30 @@ template <> struct MappingTraits<Kernel:
static const bool flow = true;
};
+template <> struct MappingTraits<IsaInfo::Metadata> {
+ static void mapping(IO &YamlIO, IsaInfo::Metadata &I) {
+ YamlIO.mapRequired(KeyName::IsaInfoWavefrontSize, I.WavefrontSize);
+ YamlIO.mapRequired(KeyName::IsaInfoLocalMemorySize, I.LocalMemorySize);
+ YamlIO.mapRequired(KeyName::IsaInfoEUsPerCU, I.EUsPerCU);
+ YamlIO.mapRequired(KeyName::IsaInfoMaxWavesPerEU, I.MaxWavesPerEU);
+ YamlIO.mapRequired(KeyName::IsaInfoMaxFlatWorkGroupSize,
+ I.MaxFlatWorkGroupSize);
+ YamlIO.mapRequired(KeyName::IsaInfoSGPRAllocGranule, I.SGPRAllocGranule);
+ YamlIO.mapRequired(KeyName::IsaInfoTotalNumSGPRs, I.TotalNumSGPRs);
+ YamlIO.mapRequired(KeyName::IsaInfoAddressableNumSGPRs,
+ I.AddressableNumSGPRs);
+ YamlIO.mapRequired(KeyName::IsaInfoVGPRAllocGranule, I.VGPRAllocGranule);
+ YamlIO.mapRequired(KeyName::IsaInfoTotalNumVGPRs, I.TotalNumVGPRs);
+ YamlIO.mapRequired(KeyName::IsaInfoAddressableNumVGPRs,
+ I.AddressableNumVGPRs);
+ }
+ static const bool flow = true;
+};
+
template <> struct MappingTraits<Program::Metadata> {
static void mapping(IO &YamlIO, Program::Metadata &Prog) {
YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq);
+ YamlIO.mapRequired(KeyName::IsaInfo, Prog.IsaInfo);
YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo);
YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels);
}
@@ -383,10 +405,27 @@ static void checkRuntimeMDYAMLString(con
}
}
-std::string llvm::getRuntimeMDYAMLString(Module &M) {
+std::string llvm::getRuntimeMDYAMLString(const FeatureBitset &Features,
+ const Module &M) {
Program::Metadata Prog;
Prog.MDVersionSeq.push_back(MDVersion);
Prog.MDVersionSeq.push_back(MDRevision);
+ Prog.IsaInfo.WavefrontSize = AMDGPU::IsaInfo::getWavefrontSize(Features);
+ Prog.IsaInfo.LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(Features);
+ Prog.IsaInfo.EUsPerCU = AMDGPU::IsaInfo::getEUsPerCU(Features);
+ Prog.IsaInfo.MaxWavesPerEU = AMDGPU::IsaInfo::getMaxWavesPerEU(Features);
+ Prog.IsaInfo.MaxFlatWorkGroupSize =
+ AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(Features);
+ Prog.IsaInfo.SGPRAllocGranule =
+ AMDGPU::IsaInfo::getSGPRAllocGranule(Features);
+ Prog.IsaInfo.TotalNumSGPRs = AMDGPU::IsaInfo::getTotalNumSGPRs(Features);
+ Prog.IsaInfo.AddressableNumSGPRs =
+ AMDGPU::IsaInfo::getAddressableNumSGPRs(Features);
+ Prog.IsaInfo.VGPRAllocGranule =
+ AMDGPU::IsaInfo::getVGPRAllocGranule(Features);
+ Prog.IsaInfo.TotalNumVGPRs = AMDGPU::IsaInfo::getTotalNumVGPRs(Features);
+ Prog.IsaInfo.AddressableNumVGPRs =
+ AMDGPU::IsaInfo::getAddressableNumVGPRs(Features);
// Set PrintfInfo.
if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h Wed Feb 8 08:05:23 2017
@@ -17,10 +17,12 @@
#include <string>
namespace llvm {
+class FeatureBitset;
class Module;
// Get runtime metadata as YAML string.
-std::string getRuntimeMDYAMLString(Module &M);
+std::string getRuntimeMDYAMLString(const FeatureBitset &Features,
+ const Module &M);
}
#endif
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp Wed Feb 8 08:05:23 2017
@@ -93,9 +93,10 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPU
OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
}
-void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) {
+void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
+ const Module &M) {
OS << "\t.amdgpu_runtime_metadata\n";
- OS << getRuntimeMDYAMLString(M);
+ OS << getRuntimeMDYAMLString(Features, M);
OS << "\n\t.end_amdgpu_runtime_metadata\n";
}
@@ -236,6 +237,7 @@ void AMDGPUTargetELFStreamer::EmitRuntim
);
}
-void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) {
- EmitRuntimeMetadata(getRuntimeMDYAMLString(M));
+void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(const FeatureBitset &Features,
+ const Module &M) {
+ EmitRuntimeMetadata(getRuntimeMDYAMLString(Features, M));
}
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h Wed Feb 8 08:05:23 2017
@@ -17,6 +17,7 @@ namespace llvm {
#include "AMDGPUPTNote.h"
class DataLayout;
+class FeatureBitset;
class Function;
class MCELFStreamer;
class MCSymbol;
@@ -46,7 +47,8 @@ public:
virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
- virtual void EmitRuntimeMetadata(Module &M) = 0;
+ virtual void EmitRuntimeMetadata(const FeatureBitset &Features,
+ const Module &M) = 0;
virtual void EmitRuntimeMetadata(StringRef Metadata) = 0;
};
@@ -70,7 +72,8 @@ public:
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
- void EmitRuntimeMetadata(Module &M) override;
+ void EmitRuntimeMetadata(const FeatureBitset &Features,
+ const Module &M) override;
void EmitRuntimeMetadata(StringRef Metadata) override;
};
@@ -101,7 +104,8 @@ public:
void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
- void EmitRuntimeMetadata(Module &M) override;
+ void EmitRuntimeMetadata(const FeatureBitset &Features,
+ const Module &M) override;
void EmitRuntimeMetadata(StringRef Metadata) override;
};
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaits.cpp Wed Feb 8 08:05:23 2017
@@ -47,7 +47,6 @@
#define DEBUG_TYPE "si-insert-waits"
using namespace llvm;
-using namespace llvm::AMDGPU;
namespace {
@@ -76,7 +75,7 @@ private:
const SIInstrInfo *TII = nullptr;
const SIRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI;
- IsaVersion IV;
+ AMDGPU::IsaInfo::IsaVersion ISA;
/// \brief Constant zero value
static const Counters ZeroCounts;
@@ -427,10 +426,10 @@ bool SIInsertWaits::insertWait(MachineBa
// Build the wait instruction
BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
- .addImm(encodeWaitcnt(IV,
- Counts.Named.VM,
- Counts.Named.EXP,
- Counts.Named.LGKM));
+ .addImm(AMDGPU::encodeWaitcnt(ISA,
+ Counts.Named.VM,
+ Counts.Named.EXP,
+ Counts.Named.LGKM));
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
@@ -458,9 +457,9 @@ void SIInsertWaits::handleExistingWait(M
unsigned Imm = I->getOperand(0).getImm();
Counters Counts, WaitOn;
- Counts.Named.VM = decodeVmcnt(IV, Imm);
- Counts.Named.EXP = decodeExpcnt(IV, Imm);
- Counts.Named.LGKM = decodeLgkmcnt(IV, Imm);
+ Counts.Named.VM = AMDGPU::decodeVmcnt(ISA, Imm);
+ Counts.Named.EXP = AMDGPU::decodeExpcnt(ISA, Imm);
+ Counts.Named.LGKM = AMDGPU::decodeLgkmcnt(ISA, Imm);
for (unsigned i = 0; i < 3; ++i) {
if (Counts.Array[i] <= LastIssued.Array[i])
@@ -534,12 +533,12 @@ bool SIInsertWaits::runOnMachineFunction
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
- IV = getIsaVersion(ST->getFeatureBits());
+ ISA = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- HardwareLimits.Named.VM = getVmcntBitMask(IV);
- HardwareLimits.Named.EXP = getExpcntBitMask(IV);
- HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
+ HardwareLimits.Named.VM = AMDGPU::getVmcntBitMask(ISA);
+ HardwareLimits.Named.EXP = AMDGPU::getExpcntBitMask(ISA);
+ HardwareLimits.Named.LGKM = AMDGPU::getLgkmcntBitMask(ISA);
WaitedOn = ZeroCounts;
DelayedWaitOn = ZeroCounts;
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Wed Feb 8 08:05:23 2017
@@ -76,47 +76,240 @@ unsigned getLgkmcntBitShift() { return 8
/// \returns Lgkmcnt bit width.
unsigned getLgkmcntBitWidth() { return 4; }
-} // anonymous namespace
+} // namespace anonymous
namespace llvm {
namespace AMDGPU {
-IsaVersion getIsaVersion(const FeatureBitset &Features) {
+namespace IsaInfo {
+IsaVersion getIsaVersion(const FeatureBitset &Features) {
+ // CI.
if (Features.test(FeatureISAVersion7_0_0))
return {7, 0, 0};
-
if (Features.test(FeatureISAVersion7_0_1))
return {7, 0, 1};
-
if (Features.test(FeatureISAVersion7_0_2))
return {7, 0, 2};
+ // VI.
if (Features.test(FeatureISAVersion8_0_0))
return {8, 0, 0};
-
if (Features.test(FeatureISAVersion8_0_1))
return {8, 0, 1};
-
if (Features.test(FeatureISAVersion8_0_2))
return {8, 0, 2};
-
if (Features.test(FeatureISAVersion8_0_3))
return {8, 0, 3};
-
if (Features.test(FeatureISAVersion8_0_4))
return {8, 0, 4};
-
if (Features.test(FeatureISAVersion8_1_0))
return {8, 1, 0};
- return {0, 0, 0};
+ if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
+ return {0, 0, 0};
+ return {7, 0, 0};
+}
+
+unsigned getWavefrontSize(const FeatureBitset &Features) {
+ if (Features.test(FeatureWavefrontSize16))
+ return 16;
+ if (Features.test(FeatureWavefrontSize32))
+ return 32;
+
+ return 64;
+}
+
+unsigned getLocalMemorySize(const FeatureBitset &Features) {
+ if (Features.test(FeatureLocalMemorySize32768))
+ return 32768;
+ if (Features.test(FeatureLocalMemorySize65536))
+ return 65536;
+
+ return 0;
+}
+
+unsigned getEUsPerCU(const FeatureBitset &Features) {
+ return 4;
+}
+
+unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ if (!Features.test(FeatureGCN))
+ return 8;
+ return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16;
+}
+
+unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
+ return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
+}
+
+unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+}
+
+unsigned getMinWavesPerEU(const FeatureBitset &Features) {
+ return 1;
+}
+
+unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
+ if (!Features.test(FeatureGCN))
+ return 8;
+ // FIXME: Need to take scratch memory into account.
+ return 10;
+}
+
+unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
+ getEUsPerCU(Features)) / getEUsPerCU(Features);
+}
+
+unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
+ return 1;
+}
+
+unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
+ return 2048;
+}
+
+unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize) {
+ return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
+ getWavefrontSize(Features);
+}
+
+unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8)
+ return 16;
+ return 8;
+}
+
+unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
+ return 8;
+}
+
+unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8)
+ return 800;
+ return 512;
}
+unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
+ if (Features.test(FeatureSGPRInitBug))
+ return FIXED_NUM_SGPRS_FOR_INIT_BUG;
+
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8)
+ return 102;
+ return 104;
+}
+
+unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8) {
+ switch (WavesPerEU) {
+ case 0: return 0;
+ case 10: return 0;
+ case 9: return 0;
+ case 8: return 81;
+ default: return 97;
+ }
+ } else {
+ switch (WavesPerEU) {
+ case 0: return 0;
+ case 10: return 0;
+ case 9: return 49;
+ case 8: return 57;
+ case 7: return 65;
+ case 6: return 73;
+ case 5: return 81;
+ default: return 97;
+ }
+ }
+}
+
+unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+ bool Addressable) {
+ IsaVersion Version = getIsaVersion(Features);
+ if (Version.Major >= 8) {
+ switch (WavesPerEU) {
+ case 0: return 80;
+ case 10: return 80;
+ case 9: return 80;
+ case 8: return 96;
+ default: return Addressable ? getAddressableNumSGPRs(Features) : 112;
+ }
+ } else {
+ switch (WavesPerEU) {
+ case 0: return 48;
+ case 10: return 48;
+ case 9: return 56;
+ case 8: return 64;
+ case 7: return 72;
+ case 6: return 80;
+ case 5: return 96;
+ default: return getAddressableNumSGPRs(Features);
+ }
+ }
+}
+
+unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
+ return 4;
+}
+
+unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
+ return getVGPRAllocGranule(Features);
+}
+
+unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
+ return 256;
+}
+
+unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
+ return getTotalNumVGPRs(Features);
+}
+
+unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+ switch (WavesPerEU) {
+ case 0: return 0;
+ case 10: return 0;
+ case 9: return 25;
+ case 8: return 29;
+ case 7: return 33;
+ case 6: return 37;
+ case 5: return 41;
+ case 4: return 49;
+ case 3: return 65;
+ case 2: return 85;
+ default: return 129;
+ }
+}
+
+unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+ switch (WavesPerEU) {
+ case 0: return 24;
+ case 10: return 24;
+ case 9: return 28;
+ case 8: return 32;
+ case 7: return 36;
+ case 6: return 40;
+ case 5: return 48;
+ case 4: return 64;
+ case 3: return 84;
+ case 2: return 128;
+ default: return getTotalNumVGPRs(Features);
+ }
+}
+
+} // namespace IsaInfo
+
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features) {
-
- IsaVersion ISA = getIsaVersion(Features);
+ IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
memset(&Header, 0, sizeof(Header));
@@ -224,57 +417,60 @@ std::pair<int, int> getIntegerPairAttrib
return Ints;
}
-unsigned getWaitcntBitMask(IsaVersion Version) {
- unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
- unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
- unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
- return Vmcnt | Expcnt | Lgkmcnt;
-}
-
-unsigned getVmcntBitMask(IsaVersion Version) {
+unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getVmcntBitWidth()) - 1;
}
-unsigned getExpcntBitMask(IsaVersion Version) {
+unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getExpcntBitWidth()) - 1;
}
-unsigned getLgkmcntBitMask(IsaVersion Version) {
+unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getLgkmcntBitWidth()) - 1;
}
-unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
+unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
+ unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
+ unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
+ unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
+ return Vmcnt | Expcnt | Lgkmcnt;
+}
+
+unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
}
-unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
+unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
+unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
Vmcnt = decodeVmcnt(Version, Waitcnt);
Expcnt = decodeExpcnt(Version, Waitcnt);
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
}
-unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
+unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Vmcnt) {
return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
}
-unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
+unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Expcnt) {
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
+unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Lgkmcnt) {
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-unsigned encodeWaitcnt(IsaVersion Version,
+unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Wed Feb 8 08:05:23 2017
@@ -34,16 +34,118 @@ class MCSubtargetInfo;
namespace AMDGPU {
-LLVM_READONLY
-int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+namespace IsaInfo {
+enum {
+ // The closed Vulkan driver sets 96, which limits the wave count to 8 but
+ // doesn't spill SGPRs as much as when 80 is set.
+ FIXED_NUM_SGPRS_FOR_INIT_BUG = 96
+};
+
+/// \brief Instruction set architecture version.
struct IsaVersion {
unsigned Major;
unsigned Minor;
unsigned Stepping;
};
+/// \returns Isa version for given subtarget \p Features.
IsaVersion getIsaVersion(const FeatureBitset &Features);
+
+/// \returns Wavefront size for given subtarget \p Features.
+unsigned getWavefrontSize(const FeatureBitset &Features);
+
+/// \returns Local memory size in bytes for given subtarget \p Features.
+unsigned getLocalMemorySize(const FeatureBitset &Features);
+
+/// \returns Number of execution units per compute unit for given subtarget \p
+/// Features.
+unsigned getEUsPerCU(const FeatureBitset &Features);
+
+/// \returns Maximum number of work groups per compute unit for given subtarget
+/// \p Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Maximum number of waves per compute unit for given subtarget \p
+/// Features without any kind of limitation.
+unsigned getMaxWavesPerCU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per compute unit for given subtarget \p
+/// Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Minimum number of waves per execution unit for given subtarget \p
+/// Features.
+unsigned getMinWavesPerEU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per execution unit for given subtarget \p
+/// Features without any kind of limitation.
+unsigned getMaxWavesPerEU(const FeatureBitset &Features);
+
+/// \returns Maximum number of waves per execution unit for given subtarget \p
+/// Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns Minimum flat work group size for given subtarget \p Features.
+unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
+
+/// \returns Maximum flat work group size for given subtarget \p Features.
+unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
+
+/// \returns Number of waves per work group for given subtarget \p Features and
+/// limited by given \p FlatWorkGroupSize.
+unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+ unsigned FlatWorkGroupSize);
+
+/// \returns SGPR allocation granularity for given subtarget \p Features.
+unsigned getSGPRAllocGranule(const FeatureBitset &Features);
+
+/// \returns SGPR encoding granularity for given subtarget \p Features.
+unsigned getSGPREncodingGranule(const FeatureBitset &Features);
+
+/// \returns Total number of SGPRs for given subtarget \p Features.
+unsigned getTotalNumSGPRs(const FeatureBitset &Features);
+
+/// \returns Addressable number of SGPRs for given subtarget \p Features.
+unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
+
+/// \returns Minimum number of SGPRs that meets the given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+/// \returns Maximum number of SGPRs that meets the given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+ bool Addressable);
+
+/// \returns VGPR allocation granularity for given subtarget \p Features.
+unsigned getVGPRAllocGranule(const FeatureBitset &Features);
+
+/// \returns VGPR encoding granularity for given subtarget \p Features.
+unsigned getVGPREncodingGranule(const FeatureBitset &Features);
+
+/// \returns Total number of VGPRs for given subtarget \p Features.
+unsigned getTotalNumVGPRs(const FeatureBitset &Features);
+
+/// \returns Addressable number of VGPRs for given subtarget \p Features.
+unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
+
+/// \returns Minimum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+/// \returns Maximum number of VGPRs that meets given number of waves per
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+
+} // namespace IsaInfo
+
+LLVM_READONLY
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const FeatureBitset &Features);
MCSection *getHSATextSection(MCContext &Ctx);
@@ -84,26 +186,26 @@ std::pair<int, int> getIntegerPairAttrib
std::pair<int, int> Default,
bool OnlyFirstRequired = false);
-/// \returns Waitcnt bit mask for given isa \p Version.
-unsigned getWaitcntBitMask(IsaVersion Version);
-
/// \returns Vmcnt bit mask for given isa \p Version.
-unsigned getVmcntBitMask(IsaVersion Version);
+unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Expcnt bit mask for given isa \p Version.
-unsigned getExpcntBitMask(IsaVersion Version);
+unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Lgkmcnt bit mask for given isa \p Version.
-unsigned getLgkmcntBitMask(IsaVersion Version);
+unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
+
+/// \returns Waitcnt bit mask for given isa \p Version.
+unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt);
+unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
@@ -113,17 +215,20 @@ unsigned decodeLgkmcnt(IsaVersion Versio
/// \p Vmcnt = \p Waitcnt[3:0]
/// \p Expcnt = \p Waitcnt[6:4]
/// \p Lgkmcnt = \p Waitcnt[11:8]
-void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt);
+unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Vmcnt);
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt);
+unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Expcnt);
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt);
+unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+ unsigned Lgkmcnt);
/// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
/// \p Version.
@@ -135,7 +240,7 @@ unsigned encodeLgkmcnt(IsaVersion Versio
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
/// isa \p Version.
-unsigned encodeWaitcnt(IsaVersion Version,
+unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
unsigned getInitialPSInputAddr(const Function &F);
Modified: llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata1.ll Wed Feb 8 08:05:23 2017
@@ -1,6 +1,6 @@
; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
; check llc does not crash for invalid opencl version metadata
-; CHECK: { amd.MDVersion: [ 2, 0 ] }
+; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
!opencl.ocl.version = !{}
Modified: llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata2.ll Wed Feb 8 08:05:23 2017
@@ -1,7 +1,7 @@
; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
; check llc does not crash for invalid opencl version metadata
-; CHECK: { amd.MDVersion: [ 2, 0 ] }
+; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
!opencl.ocl.version = !{!0}
!0 = !{}
Modified: llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/invalid-opencl-version-metadata3.ll Wed Feb 8 08:05:23 2017
@@ -1,7 +1,7 @@
; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata | FileCheck %s
; check llc does not crash for invalid opencl version metadata
-; CHECK: { amd.MDVersion: [ 2, 0 ] }
+; CHECK: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 } }
!opencl.ocl.version = !{!0}
!0 = !{i32 1}
Modified: llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll?rev=294449&r1=294448&r2=294449&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/runtime-metadata.ll Wed Feb 8 08:05:23 2017
@@ -1,4 +1,5 @@
-; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES
+; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=SI
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -filetype=obj -o - < %s | llvm-readobj -amdgpu-runtime-metadata -elf-output-style=GNU -notes | FileCheck %s --check-prefix=NOTES --check-prefix=VI
; RUN: llc -mtriple=amdgcn--amdhsa -filetype=obj -amdgpu-dump-rtmd -amdgpu-check-rtmd-parser %s -o - 2>&1 | FileCheck --check-prefix=CHECK --check-prefix=PARSER %s
%struct.A = type { i8, float }
@@ -11,9 +12,10 @@
%opencl.clk_event_t = type opaque
; CHECK: ---
-; CHECK-NEXT: { amd.MDVersion: [ 2, 0 ], amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
+; SI: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 8, amd.IsaInfoTotalNumSGPRs: 512, amd.IsaInfoAddressableNumSGPRs: 104, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
+; VI: { amd.MDVersion: [ 2, 1 ], amd.IsaInfo: { amd.IsaInfoWavefrontSize: 64, amd.IsaInfoLocalMemorySize: 65536, amd.IsaInfoEUsPerCU: 4, amd.IsaInfoMaxWavesPerEU: 10, amd.IsaInfoMaxFlatWorkGroupSize: 2048, amd.IsaInfoSGPRAllocGranule: 16, amd.IsaInfoTotalNumSGPRs: 800, amd.IsaInfoAddressableNumSGPRs: 102, amd.IsaInfoVGPRAllocGranule: 4, amd.IsaInfoTotalNumVGPRs: 256, amd.IsaInfoAddressableNumVGPRs: 256 }, amd.PrintfInfo: [ '1:1:4:%d\n', '2:1:8:%g\n' ], amd.Kernels:
-; CHECK-NEXT: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
+; CHECK: - { amd.KernelName: test_char, amd.Language: OpenCL C, amd.LanguageVersion: [ 2, 0 ], amd.Args:
; CHECK-NEXT: - { amd.ArgSize: 1, amd.ArgAlign: 1, amd.ArgKind: 0, amd.ArgValueType: 1, amd.ArgTypeName: char, amd.ArgAccQual: 0 }
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 7, amd.ArgValueType: 9 }
; CHECK-NEXT: - { amd.ArgSize: 8, amd.ArgAlign: 8, amd.ArgKind: 8, amd.ArgValueType: 9 }
@@ -345,7 +347,9 @@ define amdgpu_kernel void @test_pointee_
; NOTES-NEXT: Owner Data size Description
; NOTES-NEXT: AMD 0x00000008 Unknown note type: (0x00000001)
; NOTES-NEXT: AMD 0x0000001b Unknown note type: (0x00000003)
-; NOTES-NEXT: AMD 0x00005196 Unknown note type: (0x00000008)
+
+; SI: AMD 0x0000530d Unknown note type: (0x00000008)
+; VI: AMD 0x0000530e Unknown note type: (0x00000008)
!llvm.printf.fmts = !{!100, !101}
More information about the llvm-commits
mailing list