[llvm] r342069 - AMDGPU: Re-apply r341982 after fixing the layering issue
Konstantin Zhuravlyov via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 12 11:50:47 PDT 2018
Author: kzhuravl
Date: Wed Sep 12 11:50:47 2018
New Revision: 342069
URL: http://llvm.org/viewvc/llvm-project?rev=342069&view=rev
Log:
AMDGPU: Re-apply r341982 after fixing the layering issue
Move isa version determination into TargetParser.
Also switch away from target features to CPU string when
determining isa version. This fixes an issue when we
output wrong isa version in the object code when features
of a particular CPU are altered (i.e. gfx902 w/o xnack
used to result in gfx900).
Added:
llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll
Modified:
llvm/trunk/include/llvm/Support/TargetParser.h
llvm/trunk/lib/Support/TargetParser.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s
Modified: llvm/trunk/include/llvm/Support/TargetParser.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/TargetParser.h?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/TargetParser.h (original)
+++ llvm/trunk/include/llvm/Support/TargetParser.h Wed Sep 12 11:50:47 2018
@@ -320,6 +320,13 @@ enum GPUKind : uint32_t {
GK_AMDGCN_LAST = GK_GFX906,
};
+/// Instruction set architecture version.
+struct IsaVersion {
+ unsigned Major;
+ unsigned Minor;
+ unsigned Stepping;
+};
+
// This isn't comprehensive for now, just things that are needed from the
// frontend driver.
enum ArchFeatureKind : uint32_t {
@@ -335,18 +342,20 @@ enum ArchFeatureKind : uint32_t {
FEATURE_FAST_DENORMAL_F32 = 1 << 5
};
-GPUKind parseArchAMDGCN(StringRef CPU);
-GPUKind parseArchR600(StringRef CPU);
StringRef getArchNameAMDGCN(GPUKind AK);
StringRef getArchNameR600(GPUKind AK);
StringRef getCanonicalArchName(StringRef Arch);
+GPUKind parseArchAMDGCN(StringRef CPU);
+GPUKind parseArchR600(StringRef CPU);
unsigned getArchAttrAMDGCN(GPUKind AK);
unsigned getArchAttrR600(GPUKind AK);
void fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values);
void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
-}
+IsaVersion getIsaVersion(StringRef GPU);
+
+} // namespace AMDGPU
} // namespace llvm
Modified: llvm/trunk/lib/Support/TargetParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/TargetParser.cpp?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/lib/Support/TargetParser.cpp (original)
+++ llvm/trunk/lib/Support/TargetParser.cpp Wed Sep 12 11:50:47 2018
@@ -22,6 +22,7 @@
using namespace llvm;
using namespace ARM;
using namespace AArch64;
+using namespace AMDGPU;
namespace {
@@ -947,6 +948,8 @@ bool llvm::AArch64::isX18ReservedByDefau
TT.isOSWindows();
}
+namespace {
+
struct GPUInfo {
StringLiteral Name;
StringLiteral CanonicalName;
@@ -954,11 +957,9 @@ struct GPUInfo {
unsigned Features;
};
-using namespace AMDGPU;
-static constexpr GPUInfo R600GPUs[26] = {
- // Name Canonical Kind Features
- // Name
- //
+constexpr GPUInfo R600GPUs[26] = {
+ // Name Canonical Kind Features
+ // Name
{{"r600"}, {"r600"}, GK_R600, FEATURE_NONE },
{{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE },
{{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE },
@@ -989,9 +990,9 @@ static constexpr GPUInfo R600GPUs[26] =
// This table should be sorted by the value of GPUKind
// Don't bother listing the implicitly true features
-static constexpr GPUInfo AMDGCNGPUs[32] = {
- // Name Canonical Kind Features
- // Name
+constexpr GPUInfo AMDGCNGPUs[32] = {
+ // Name Canonical Kind Features
+ // Name
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
{{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
{{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
@@ -1026,8 +1027,7 @@ static constexpr GPUInfo AMDGCNGPUs[32]
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
};
-static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK,
- ArrayRef<GPUInfo> Table) {
+const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
auto I = std::lower_bound(Table.begin(), Table.end(), Search,
@@ -1040,6 +1040,8 @@ static const GPUInfo *getArchEntry(AMDGP
return I;
}
+} // namespace
+
StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
return Entry->CanonicalName;
@@ -1092,3 +1094,31 @@ void AMDGPU::fillValidArchListR600(Small
for (const auto C : R600GPUs)
Values.push_back(C.Name);
}
+
+AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
+ if (GPU == "generic")
+ return {7, 0, 0};
+
+ AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
+ if (AK == AMDGPU::GPUKind::GK_NONE)
+ return {0, 0, 0};
+
+ switch (AK) {
+ case GK_GFX600: return {6, 0, 0};
+ case GK_GFX601: return {6, 0, 1};
+ case GK_GFX700: return {7, 0, 0};
+ case GK_GFX701: return {7, 0, 1};
+ case GK_GFX702: return {7, 0, 2};
+ case GK_GFX703: return {7, 0, 3};
+ case GK_GFX704: return {7, 0, 4};
+ case GK_GFX801: return {8, 0, 1};
+ case GK_GFX802: return {8, 0, 2};
+ case GK_GFX803: return {8, 0, 3};
+ case GK_GFX810: return {8, 1, 0};
+ case GK_GFX900: return {9, 0, 0};
+ case GK_GFX902: return {9, 0, 2};
+ case GK_GFX904: return {9, 0, 4};
+ case GK_GFX906: return {9, 0, 6};
+ default: return {0, 0, 0};
+ }
+}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Wed Sep 12 11:50:47 2018
@@ -40,6 +40,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -134,9 +135,9 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
// HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
- IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
+ IsaVersion Version = getIsaVersion(getSTI()->getCPU());
getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
- ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
+ Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
}
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
@@ -240,7 +241,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyE
*getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
CurrentProgramInfo.NumVGPRsForWavesPerEU,
CurrentProgramInfo.NumSGPRsForWavesPerEU -
- IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
+ IsaInfo::getNumExtraSGPRs(getSTI(),
CurrentProgramInfo.VCCUsed,
CurrentProgramInfo.FlatUsed),
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
@@ -561,7 +562,7 @@ static bool hasAnyNonFlatUseOfReg(const
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
const GCNSubtarget &ST) const {
- return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
+ return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
UsesVCC, UsesFlatScratch);
}
@@ -758,7 +759,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
// 48 SGPRs - vcc, - flat_scr, -xnack
int MaxSGPRGuess =
- 47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
+ 47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
@@ -823,7 +824,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
// unified.
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
- STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
+ getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
@@ -905,9 +906,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
}
ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
- STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
+ getSTI(), ProgInfo.NumSGPRsForWavesPerEU);
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
- STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
+ getSTI(), ProgInfo.NumVGPRsForWavesPerEU);
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
@@ -1137,7 +1138,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
+ AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
Out.compute_pgm_resource_registers =
CurrentProgramInfo.ComputePGMRSrc1 |
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Wed Sep 12 11:50:47 2018
@@ -124,10 +124,8 @@ GCNSubtarget::initializeSubtargetDepende
return *this;
}
-AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
- const FeatureBitset &FeatureBits) :
+AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
TargetTriple(TT),
- SubtargetFeatureBits(FeatureBits),
Has16BitInsts(false),
HasMadMixInsts(false),
FP32Denormals(false),
@@ -144,9 +142,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
{ }
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
- const GCNTargetMachine &TM) :
+ const GCNTargetMachine &TM) :
AMDGPUGenSubtargetInfo(TT, GPU, FS),
- AMDGPUSubtarget(TT, getFeatureBits()),
+ AMDGPUSubtarget(TT),
TargetTriple(TT),
Gen(SOUTHERN_ISLANDS),
IsaVersion(ISAVersion0_0_0),
@@ -448,7 +446,7 @@ unsigned AMDGPUSubtarget::getKernArgSegm
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
const TargetMachine &TM) :
R600GenSubtargetInfo(TT, GPU, FS),
- AMDGPUSubtarget(TT, getFeatureBits()),
+ AMDGPUSubtarget(TT),
InstrInfo(*this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
FMA(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Wed Sep 12 11:50:47 2018
@@ -63,7 +63,6 @@ private:
Triple TargetTriple;
protected:
- const FeatureBitset &SubtargetFeatureBits;
bool Has16BitInsts;
bool HasMadMixInsts;
bool FP32Denormals;
@@ -79,7 +78,7 @@ protected:
unsigned WavefrontSize;
public:
- AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
+ AMDGPUSubtarget(const Triple &TT);
static const AMDGPUSubtarget &get(const MachineFunction &MF);
static const AMDGPUSubtarget &get(const TargetMachine &TM,
@@ -203,33 +202,21 @@ public:
/// \returns Maximum number of work groups per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
- FlatWorkGroupSize);
- }
+ virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
/// \returns Minimum flat work group size supported by the subtarget.
- unsigned getMinFlatWorkGroupSize() const {
- return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
- }
+ virtual unsigned getMinFlatWorkGroupSize() const = 0;
/// \returns Maximum flat work group size supported by the subtarget.
- unsigned getMaxFlatWorkGroupSize() const {
- return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
- }
+ virtual unsigned getMaxFlatWorkGroupSize() const = 0;
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
- FlatWorkGroupSize);
- }
+ virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
- unsigned getMinWavesPerEU() const {
- return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
- }
+ virtual unsigned getMinWavesPerEU() const = 0;
unsigned getMaxWavesPerEU() const { return 10; }
@@ -708,20 +695,19 @@ public:
/// \returns Number of execution units per compute unit supported by the
/// subtarget.
unsigned getEUsPerCU() const {
- return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getEUsPerCU(this);
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerCU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
- FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
}
/// \returns Maximum number of waves per execution unit supported by the
@@ -733,8 +719,7 @@ public:
/// \returns Number of waves per work group supported by the subtarget and
/// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getWavesPerWorkGroup(
- MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
}
// static wrappers
@@ -853,39 +838,34 @@ public:
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getSGPRAllocGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
}
/// \returns SGPR encoding granularity supported by the subtarget.
unsigned getSGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getSGPREncodingGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
}
/// \returns Total number of SGPRs supported by the subtarget.
unsigned getTotalNumSGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
}
/// \returns Addressable number of SGPRs supported by the subtarget.
unsigned getAddressableNumSGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumSGPRs(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
}
/// \returns Minimum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
}
/// \returns Maximum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
- return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU, Addressable);
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
}
/// \returns Reserved number of SGPRs for given function \p MF.
@@ -903,39 +883,34 @@ public:
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getVGPRAllocGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
}
/// \returns VGPR encoding granularity supported by the subtarget.
unsigned getVGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getVGPREncodingGranule(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
}
/// \returns Total number of VGPRs supported by the subtarget.
unsigned getTotalNumVGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
}
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumVGPRs(
- MCSubtargetInfo::getFeatureBits());
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
}
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
- WavesPerEU);
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -951,6 +926,34 @@ public:
void getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const override;
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum number of waves per execution unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const override {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+ }
};
class R600Subtarget final : public R600GenSubtargetInfo,
@@ -1061,6 +1064,34 @@ public:
bool enableSubRegLiveness() const override {
return true;
}
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const override {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
+ }
+
+ /// \returns Maximum number of waves per execution unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const override {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(this);
+ }
};
} // end namespace llvm
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed Sep 12 11:50:47 2018
@@ -49,6 +49,7 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -917,8 +918,7 @@ public:
// Currently there is none suitable machinery in the core llvm-mc for this.
// MCSymbol::isRedefinable is intended for another purpose, and
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
MCContext &Ctx = getContext();
if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
MCSymbol *Sym =
@@ -1826,7 +1826,7 @@ bool AMDGPUAsmParser::updateGprCountSymb
unsigned DwordRegIndex,
unsigned RegWidth) {
// Symbols are only defined for GCN targets
- if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
+ if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
return true;
auto SymbolName = getGprCountSymbolName(RegKind);
@@ -2637,18 +2637,18 @@ bool AMDGPUAsmParser::calculateGPRBlocks
unsigned &SGPRBlocks) {
// TODO(scott.linder): These calculations are duplicated from
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
- IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
+ IsaVersion Version = getIsaVersion(getSTI().getCPU());
unsigned NumVGPRs = NextFreeVGPR;
unsigned NumSGPRs = NextFreeSGPR;
- unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
+ unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
NumSGPRs > MaxAddressableNumSGPRs)
return OutOfRangeError(SGPRRange);
NumSGPRs +=
- IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
+ IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
NumSGPRs > MaxAddressableNumSGPRs)
@@ -2657,8 +2657,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks
if (Features.test(FeatureSGPRInitBug))
NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
- VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
- SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
+ VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
+ SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
return false;
}
@@ -2678,8 +2678,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
StringSet<> Seen;
- IsaInfo::IsaVersion IVersion =
- IsaInfo::getIsaVersion(getSTI().getFeatureBits());
+ IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
SMRange VGPRRange;
uint64_t NextFreeVGPR = 0;
@@ -2938,8 +2937,7 @@ bool AMDGPUAsmParser::ParseDirectiveHSAC
// If this directive has no arguments, then use the ISA version for the
// targeted GPU.
if (getLexer().is(AsmToken::EndOfStatement)) {
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
ISA.Stepping,
"AMD", "AMDGPU");
@@ -3001,7 +2999,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCode
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
amd_kernel_code_t Header;
- AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
+ AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
@@ -3679,12 +3677,12 @@ void AMDGPUAsmParser::cvtExp(MCInst &Ins
static bool
encodeCnt(
- const AMDGPU::IsaInfo::IsaVersion ISA,
+ const AMDGPU::IsaVersion ISA,
int64_t &IntVal,
int64_t CntVal,
bool Saturate,
- unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
- unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
+ unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
+ unsigned (*decode)(const IsaVersion &Version, unsigned))
{
bool Failed = false;
@@ -3715,8 +3713,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &
if (getParser().parseAbsoluteExpression(CntVal))
return true;
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
bool Failed = true;
bool Sat = CntName.endswith("_sat");
@@ -3751,8 +3748,7 @@ bool AMDGPUAsmParser::parseCnt(int64_t &
OperandMatchResultTy
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
int64_t Waitcnt = getWaitcntBitMask(ISA);
SMLoc S = Parser.getTok().getLoc();
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Wed Sep 12 11:50:47 2018
@@ -1155,8 +1155,7 @@ void AMDGPUInstPrinter::printSwizzle(con
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- AMDGPU::IsaInfo::IsaVersion ISA =
- AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
unsigned SImm16 = MI->getOperand(OpNo).getImm();
unsigned Vmcnt, Expcnt, Lgkmcnt;
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp Wed Sep 12 11:50:47 2018
@@ -27,6 +27,7 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetParser.h"
namespace llvm {
#include "AMDGPUPTNote.h"
@@ -39,84 +40,6 @@ using namespace llvm::AMDGPU;
// AMDGPUTargetStreamer
//===----------------------------------------------------------------------===//
-static const struct {
- const char *Name;
- unsigned Mach;
-} MachTable[] = {
- // Radeon HD 2000/3000 Series (R600).
- { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
- { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
- { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
- { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
- // Radeon HD 4000 Series (R700).
- { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
- { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
- { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
- // Radeon HD 5000 Series (Evergreen).
- { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
- { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
- { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
- { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
- { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
- // Radeon HD 6000 Series (Northern Islands).
- { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
- { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
- { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
- { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
- // AMDGCN GFX6.
- { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
- { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
- { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
- // AMDGCN GFX7.
- { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
- { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
- { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
- { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
- { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
- { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
- { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
- { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
- { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
- { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
- // AMDGCN GFX8.
- { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
- { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
- { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
- { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
- { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
- { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
- { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
- { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
- { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
- { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
- { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
- // AMDGCN GFX9.
- { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
- { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
- { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
- { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
- // Not specified processor.
- { nullptr, ELF::EF_AMDGPU_MACH_NONE }
-};
-
-unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
- auto Entry = MachTable;
- for (; Entry->Name && GPU != Entry->Name; ++Entry)
- ;
- return Entry->Mach;
-}
-
-const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
- auto Entry = MachTable;
- for (; Entry->Name && Mach != Entry->Mach; ++Entry)
- ;
- return Entry->Name;
-}
-
bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
HSAMD::Metadata HSAMetadata;
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
@@ -125,6 +48,93 @@ bool AMDGPUTargetStreamer::EmitHSAMetada
return EmitHSAMetadata(HSAMetadata);
}
+StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
+ AMDGPU::GPUKind AK;
+
+ switch (ElfMach) {
+ case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
+ case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
+ case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
+ case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
+ case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
+ case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
+ case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
+ case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
+ case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
+ case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
+ case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
+ case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
+ case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
+ }
+
+ StringRef GPUName = getArchNameAMDGCN(AK);
+ if (GPUName != "")
+ return GPUName;
+ return getArchNameR600(AK);
+}
+
+unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
+ AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
+ if (AK == AMDGPU::GPUKind::GK_NONE)
+ AK = parseArchR600(GPU);
+
+ switch (AK) {
+ case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
+ case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
+ case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
+ case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
+ case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
+ case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
+ case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
+ case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
+ case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
+ case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
+ case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
+ case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
+ case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
+ case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
+ case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
+ case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
+ case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
+ case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
+ case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
+ case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
+ case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
+ case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
+ case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
+ case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
+ case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
+ case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
+ case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
+ case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
+ case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
+ case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
+ case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
+ case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
+ }
+
+ llvm_unreachable("unknown GPU");
+}
+
//===----------------------------------------------------------------------===//
// AMDGPUTargetAsmStreamer
//===----------------------------------------------------------------------===//
@@ -205,7 +215,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsa
bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
- IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
+ IsaVersion IVersion = getIsaVersion(STI.getCPU());
OS << "\t.amdhsa_kernel " << KernelName << '\n';
@@ -342,7 +352,7 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELF
unsigned EFlags = MCA.getELFHeaderEFlags();
EFlags &= ~ELF::EF_AMDGPU_MACH;
- EFlags |= getMACH(STI.getCPU());
+ EFlags |= getElfMach(STI.getCPU());
EFlags &= ~ELF::EF_AMDGPU_XNACK;
if (AMDGPU::hasXNACK(STI))
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h Wed Sep 12 11:50:47 2018
@@ -31,13 +31,7 @@ class AMDGPUTargetStreamer : public MCTa
protected:
MCContext &getContext() const { return Streamer.getContext(); }
- /// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name.
- unsigned getMACH(StringRef GPU) const;
-
public:
- /// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value.
- static const char *getMachName(unsigned Mach);
-
AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
@@ -71,6 +65,9 @@ public:
const amdhsa::kernel_descriptor_t &KernelDescriptor, uint64_t NextVGPR,
uint64_t NextSGPR, bool ReserveVCC, bool ReserveFlatScr,
bool ReserveXNACK) = 0;
+
+ static StringRef getArchNameFromElfMach(unsigned ElfMach);
+ static unsigned getElfMach(StringRef GPU);
};
class AMDGPUTargetAsmStreamer final : public AMDGPUTargetStreamer {
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Wed Sep 12 11:50:47 2018
@@ -369,7 +369,7 @@ private:
const SIRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI = nullptr;
const MachineLoopInfo *MLI = nullptr;
- AMDGPU::IsaInfo::IsaVersion IV;
+ AMDGPU::IsaVersion IV;
DenseSet<MachineBasicBlock *> BlockVisitedSet;
DenseSet<MachineInstr *> TrackedWaitcntSet;
@@ -1841,7 +1841,7 @@ bool SIInsertWaitcnts::runOnMachineFunct
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
MLI = &getAnalysis<MachineLoopInfo>();
- IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
+ IV = AMDGPU::getIsaVersion(ST->getCPU());
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
Modified: llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp Wed Sep 12 11:50:47 2018
@@ -253,7 +253,7 @@ protected:
/// Instruction info.
const SIInstrInfo *TII = nullptr;
- IsaInfo::IsaVersion IV;
+ IsaVersion IV;
SICacheControl(const GCNSubtarget &ST);
@@ -605,7 +605,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::get
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
TII = ST.getInstrInfo();
- IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
+ IV = getIsaVersion(ST.getCPU());
}
/* static */
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Wed Sep 12 11:50:47 2018
@@ -137,68 +137,18 @@ int getMCOpcode(uint16_t Opcode, unsigne
namespace IsaInfo {
-IsaVersion getIsaVersion(const FeatureBitset &Features) {
- // GCN GFX6 (Southern Islands (SI)).
- if (Features.test(FeatureISAVersion6_0_0))
- return {6, 0, 0};
- if (Features.test(FeatureISAVersion6_0_1))
- return {6, 0, 1};
-
- // GCN GFX7 (Sea Islands (CI)).
- if (Features.test(FeatureISAVersion7_0_0))
- return {7, 0, 0};
- if (Features.test(FeatureISAVersion7_0_1))
- return {7, 0, 1};
- if (Features.test(FeatureISAVersion7_0_2))
- return {7, 0, 2};
- if (Features.test(FeatureISAVersion7_0_3))
- return {7, 0, 3};
- if (Features.test(FeatureISAVersion7_0_4))
- return {7, 0, 4};
- if (Features.test(FeatureSeaIslands))
- return {7, 0, 0};
-
- // GCN GFX8 (Volcanic Islands (VI)).
- if (Features.test(FeatureISAVersion8_0_1))
- return {8, 0, 1};
- if (Features.test(FeatureISAVersion8_0_2))
- return {8, 0, 2};
- if (Features.test(FeatureISAVersion8_0_3))
- return {8, 0, 3};
- if (Features.test(FeatureISAVersion8_1_0))
- return {8, 1, 0};
- if (Features.test(FeatureVolcanicIslands))
- return {8, 0, 0};
-
- // GCN GFX9.
- if (Features.test(FeatureISAVersion9_0_0))
- return {9, 0, 0};
- if (Features.test(FeatureISAVersion9_0_2))
- return {9, 0, 2};
- if (Features.test(FeatureISAVersion9_0_4))
- return {9, 0, 4};
- if (Features.test(FeatureISAVersion9_0_6))
- return {9, 0, 6};
- if (Features.test(FeatureGFX9))
- return {9, 0, 0};
-
- if (Features.test(FeatureSouthernIslands))
- return {0, 0, 0};
- return {7, 0, 0};
-}
-
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
auto TargetTriple = STI->getTargetTriple();
- auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
+ auto Version = getIsaVersion(STI->getCPU());
Stream << TargetTriple.getArchName() << '-'
<< TargetTriple.getVendorName() << '-'
<< TargetTriple.getOSName() << '-'
<< TargetTriple.getEnvironmentName() << '-'
<< "gfx"
- << ISAVersion.Major
- << ISAVersion.Minor
- << ISAVersion.Stepping;
+ << Version.Major
+ << Version.Minor
+ << Version.Stepping;
if (hasXNACK(*STI))
Stream << "+xnack";
@@ -210,49 +160,49 @@ bool hasCodeObjectV3(const MCSubtargetIn
return STI->getFeatureBits().test(FeatureCodeObjectV3);
}
-unsigned getWavefrontSize(const FeatureBitset &Features) {
- if (Features.test(FeatureWavefrontSize16))
+unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureWavefrontSize16))
return 16;
- if (Features.test(FeatureWavefrontSize32))
+ if (STI->getFeatureBits().test(FeatureWavefrontSize32))
return 32;
return 64;
}
-unsigned getLocalMemorySize(const FeatureBitset &Features) {
- if (Features.test(FeatureLocalMemorySize32768))
+unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
return 32768;
- if (Features.test(FeatureLocalMemorySize65536))
+ if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
return 65536;
return 0;
}
-unsigned getEUsPerCU(const FeatureBitset &Features) {
+unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
return 4;
}
-unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- if (!Features.test(FeatureGCN))
+ if (!STI->getFeatureBits().test(FeatureGCN))
return 8;
- unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
if (N == 1)
return 40;
N = 40 / N;
return std::min(N, 16u);
}
-unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
- return getMaxWavesPerEU() * getEUsPerCU(Features);
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
+ return getMaxWavesPerEU() * getEUsPerCU(STI);
}
-unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
+ return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
}
-unsigned getMinWavesPerEU(const FeatureBitset &Features) {
+unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
return 1;
}
@@ -261,89 +211,89 @@ unsigned getMaxWavesPerEU() {
return 10;
}
-unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
- getEUsPerCU(Features)) / getEUsPerCU(Features);
+ return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
+ getEUsPerCU(STI)) / getEUsPerCU(STI);
}
-unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
+unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
return 1;
}
-unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
+unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
return 2048;
}
-unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize) {
- return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
- getWavefrontSize(Features);
+ return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
+ getWavefrontSize(STI);
}
-unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
- IsaVersion Version = getIsaVersion(Features);
+unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major >= 8)
return 16;
return 8;
}
-unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
+unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
return 8;
}
-unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
- IsaVersion Version = getIsaVersion(Features);
+unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major >= 8)
return 800;
return 512;
}
-unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
- if (Features.test(FeatureSGPRInitBug))
+unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
+ if (STI->getFeatureBits().test(FeatureSGPRInitBug))
return FIXED_NUM_SGPRS_FOR_INIT_BUG;
- IsaVersion Version = getIsaVersion(Features);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major >= 8)
return 102;
return 104;
}
-unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
- unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
- if (Features.test(FeatureTrapHandler))
+ unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
+ if (STI->getFeatureBits().test(FeatureTrapHandler))
MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
- MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
- return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
+ MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
+ return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
}
-unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
bool Addressable) {
assert(WavesPerEU != 0);
- IsaVersion Version = getIsaVersion(Features);
- unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
if (Version.Major >= 8 && !Addressable)
AddressableNumSGPRs = 112;
- unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
- if (Features.test(FeatureTrapHandler))
+ unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
+ if (STI->getFeatureBits().test(FeatureTrapHandler))
MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
- MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
+ MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
}
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed, bool XNACKUsed) {
unsigned ExtraSGPRs = 0;
if (VCCUsed)
ExtraSGPRs = 2;
- IsaVersion Version = getIsaVersion(Features);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
if (Version.Major < 8) {
if (FlatScrUsed)
ExtraSGPRs = 4;
@@ -358,74 +308,74 @@ unsigned getNumExtraSGPRs(const FeatureB
return ExtraSGPRs;
}
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed) {
- return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
- Features[AMDGPU::FeatureXNACK]);
+ return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
+ STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
}
-unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
- NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
+unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
+ NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
// SGPRBlocks is actual number of SGPR blocks minus 1.
- return NumSGPRs / getSGPREncodingGranule(Features) - 1;
+ return NumSGPRs / getSGPREncodingGranule(STI) - 1;
}
-unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
return 4;
}
-unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
- return getVGPRAllocGranule(Features);
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
+ return getVGPRAllocGranule(STI);
}
-unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
return 256;
}
-unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
- return getTotalNumVGPRs(Features);
+unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
+ return getTotalNumVGPRs(STI);
}
-unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
unsigned MinNumVGPRs =
- alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
- getVGPRAllocGranule(Features)) + 1;
- return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
+ alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
+ getVGPRAllocGranule(STI)) + 1;
+ return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
}
-unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
- unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
- getVGPRAllocGranule(Features));
- unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
+ unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
+ getVGPRAllocGranule(STI));
+ unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
-unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
- NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
+ NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
// VGPRBlocks is actual number of VGPR blocks minus 1.
- return NumVGPRs / getVGPREncodingGranule(Features) - 1;
+ return NumVGPRs / getVGPREncodingGranule(STI) - 1;
}
} // end namespace IsaInfo
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
- const FeatureBitset &Features) {
- IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
+ const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
memset(&Header, 0, sizeof(Header));
Header.amd_kernel_code_version_major = 1;
Header.amd_kernel_code_version_minor = 2;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
- Header.amd_machine_version_major = ISA.Major;
- Header.amd_machine_version_minor = ISA.Minor;
- Header.amd_machine_version_stepping = ISA.Stepping;
+ Header.amd_machine_version_major = Version.Major;
+ Header.amd_machine_version_minor = Version.Minor;
+ Header.amd_machine_version_stepping = Version.Stepping;
Header.kernel_code_entry_byte_offset = sizeof(Header);
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
Header.wavefront_size = 6;
@@ -513,7 +463,7 @@ std::pair<int, int> getIntegerPairAttrib
return Ints;
}
-unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getVmcntBitMask(const IsaVersion &Version) {
unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
if (Version.Major < 9)
return VmcntLo;
@@ -522,15 +472,15 @@ unsigned getVmcntBitMask(const IsaInfo::
return VmcntLo | VmcntHi;
}
-unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getExpcntBitMask(const IsaVersion &Version) {
return (1 << getExpcntBitWidth()) - 1;
}
-unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getLgkmcntBitMask(const IsaVersion &Version) {
return (1 << getLgkmcntBitWidth()) - 1;
}
-unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
+unsigned getWaitcntBitMask(const IsaVersion &Version) {
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
@@ -542,7 +492,7 @@ unsigned getWaitcntBitMask(const IsaInfo
return Waitcnt | VmcntHi;
}
-unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
unsigned VmcntLo =
unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
if (Version.Major < 9)
@@ -554,22 +504,22 @@ unsigned decodeVmcnt(const IsaInfo::IsaV
return VmcntLo | VmcntHi;
}
-unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
Vmcnt = decodeVmcnt(Version, Waitcnt);
Expcnt = decodeExpcnt(Version, Waitcnt);
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
}
-unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Vmcnt) {
Waitcnt =
packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
@@ -580,17 +530,17 @@ unsigned encodeVmcnt(const IsaInfo::IsaV
return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
}
-unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Expcnt) {
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt) {
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
+unsigned encodeWaitcnt(const IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Wed Sep 12 11:50:47 2018
@@ -19,6 +19,7 @@
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetParser.h"
#include <cstdint>
#include <string>
#include <utility>
@@ -56,16 +57,6 @@ enum {
TRAP_NUM_SGPRS = 16
};
-/// Instruction set architecture version.
-struct IsaVersion {
- unsigned Major;
- unsigned Minor;
- unsigned Stepping;
-};
-
-/// \returns Isa version for given subtarget \p Features.
-IsaVersion getIsaVersion(const FeatureBitset &Features);
-
/// Streams isa version string for given subtarget \p STI into \p Stream.
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
@@ -73,114 +64,114 @@ void streamIsaVersion(const MCSubtargetI
/// false otherwise.
bool hasCodeObjectV3(const MCSubtargetInfo *STI);
-/// \returns Wavefront size for given subtarget \p Features.
-unsigned getWavefrontSize(const FeatureBitset &Features);
+/// \returns Wavefront size for given subtarget \p STI.
+unsigned getWavefrontSize(const MCSubtargetInfo *STI);
-/// \returns Local memory size in bytes for given subtarget \p Features.
-unsigned getLocalMemorySize(const FeatureBitset &Features);
+/// \returns Local memory size in bytes for given subtarget \p STI.
+unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
/// \returns Number of execution units per compute unit for given subtarget \p
-/// Features.
-unsigned getEUsPerCU(const FeatureBitset &Features);
+/// STI.
+unsigned getEUsPerCU(const MCSubtargetInfo *STI);
/// \returns Maximum number of work groups per compute unit for given subtarget
-/// \p Features and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
+/// \p STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
/// \returns Maximum number of waves per compute unit for given subtarget \p
-/// Features without any kind of limitation.
-unsigned getMaxWavesPerCU(const FeatureBitset &Features);
+/// STI without any kind of limitation.
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
/// \returns Maximum number of waves per compute unit for given subtarget \p
-/// Features and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerCU(const FeatureBitset &Features,
+/// STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
/// \returns Minimum number of waves per execution unit for given subtarget \p
-/// Features.
-unsigned getMinWavesPerEU(const FeatureBitset &Features);
+/// STI.
+unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
/// \returns Maximum number of waves per execution unit for given subtarget \p
-/// Features without any kind of limitation.
+/// STI without any kind of limitation.
unsigned getMaxWavesPerEU();
/// \returns Maximum number of waves per execution unit for given subtarget \p
-/// Features and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerEU(const FeatureBitset &Features,
+/// STI and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
-/// \returns Minimum flat work group size for given subtarget \p Features.
-unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
+/// \returns Minimum flat work group size for given subtarget \p STI.
+unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
-/// \returns Maximum flat work group size for given subtarget \p Features.
-unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
+/// \returns Maximum flat work group size for given subtarget \p STI.
+unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
-/// \returns Number of waves per work group for given subtarget \p Features and
+/// \returns Number of waves per work group for given subtarget \p STI and
/// limited by given \p FlatWorkGroupSize.
-unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
+unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
unsigned FlatWorkGroupSize);
-/// \returns SGPR allocation granularity for given subtarget \p Features.
-unsigned getSGPRAllocGranule(const FeatureBitset &Features);
+/// \returns SGPR allocation granularity for given subtarget \p STI.
+unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
-/// \returns SGPR encoding granularity for given subtarget \p Features.
-unsigned getSGPREncodingGranule(const FeatureBitset &Features);
+/// \returns SGPR encoding granularity for given subtarget \p STI.
+unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
-/// \returns Total number of SGPRs for given subtarget \p Features.
-unsigned getTotalNumSGPRs(const FeatureBitset &Features);
+/// \returns Total number of SGPRs for given subtarget \p STI.
+unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
-/// \returns Addressable number of SGPRs for given subtarget \p Features.
-unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
+/// \returns Addressable number of SGPRs for given subtarget \p STI.
+unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
/// \returns Minimum number of SGPRs that meets the given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
/// \returns Maximum number of SGPRs that meets the given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
bool Addressable);
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
-/// Features when the given special registers are used.
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+/// STI when the given special registers are used.
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed, bool XNACKUsed);
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
-/// Features when the given special registers are used. XNACK is inferred from
-/// \p Features.
-unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
+/// STI when the given special registers are used. XNACK is inferred from
+/// \p STI.
+unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
bool FlatScrUsed);
-/// \returns Number of SGPR blocks needed for given subtarget \p Features when
+/// \returns Number of SGPR blocks needed for given subtarget \p STI when
/// \p NumSGPRs are used. \p NumSGPRs should already include any special
/// register counts.
-unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
+unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
-/// \returns VGPR allocation granularity for given subtarget \p Features.
-unsigned getVGPRAllocGranule(const FeatureBitset &Features);
+/// \returns VGPR allocation granularity for given subtarget \p STI.
+unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
-/// \returns VGPR encoding granularity for given subtarget \p Features.
-unsigned getVGPREncodingGranule(const FeatureBitset &Features);
+/// \returns VGPR encoding granularity for given subtarget \p STI.
+unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
-/// \returns Total number of VGPRs for given subtarget \p Features.
-unsigned getTotalNumVGPRs(const FeatureBitset &Features);
+/// \returns Total number of VGPRs for given subtarget \p STI.
+unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
-/// \returns Addressable number of VGPRs for given subtarget \p Features.
-unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
+/// \returns Addressable number of VGPRs for given subtarget \p STI.
+unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
/// \returns Minimum number of VGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
/// \returns Maximum number of VGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p Features.
-unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p STI.
+unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
-/// \returns Number of VGPR blocks needed for given subtarget \p Features when
+/// \returns Number of VGPR blocks needed for given subtarget \p STI when
/// \p NumVGPRs are used.
-unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
+unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
} // end namespace IsaInfo
@@ -233,7 +224,7 @@ LLVM_READONLY
int getMCOpcode(uint16_t Opcode, unsigned Gen);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
- const FeatureBitset &Features);
+ const MCSubtargetInfo *STI);
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
@@ -268,25 +259,25 @@ std::pair<int, int> getIntegerPairAttrib
bool OnlyFirstRequired = false);
/// \returns Vmcnt bit mask for given isa \p Version.
-unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getVmcntBitMask(const IsaVersion &Version);
/// \returns Expcnt bit mask for given isa \p Version.
-unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getExpcntBitMask(const IsaVersion &Version);
/// \returns Lgkmcnt bit mask for given isa \p Version.
-unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getLgkmcntBitMask(const IsaVersion &Version);
/// \returns Waitcnt bit mask for given isa \p Version.
-unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
+unsigned getWaitcntBitMask(const IsaVersion &Version);
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
@@ -297,19 +288,19 @@ unsigned decodeLgkmcnt(const IsaInfo::Is
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
/// \p Expcnt = \p Waitcnt[6:4]
/// \p Lgkmcnt = \p Waitcnt[11:8]
-void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Vmcnt);
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Expcnt);
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt);
/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
@@ -324,7 +315,7 @@ unsigned encodeLgkmcnt(const IsaInfo::Is
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
/// isa \p Version.
-unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
+unsigned encodeWaitcnt(const IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
unsigned getInitialPSInputAddr(const Function &F);
Added: llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll?rev=342069&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll Wed Sep 12 11:50:47 2018
@@ -0,0 +1,8 @@
+; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s
+
+; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
+define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
+ store float 0.0, float addrspace(1)* %out0
+ ret void
+}
+
Modified: llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s?rev=342069&r1=342068&r2=342069&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s Wed Sep 12 11:50:47 2018
@@ -2,5 +2,5 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
.hsa_code_object_isa
-// GFX8: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU"
+// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
More information about the llvm-commits
mailing list