[llvm] r342023 - Revert "AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination into TargetParser."
Ilya Biryukov via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 12 00:05:31 PDT 2018
Author: ibiryukov
Date: Wed Sep 12 00:05:30 2018
New Revision: 342023
URL: http://llvm.org/viewvc/llvm-project?rev=342023&view=rev
Log:
Revert "AMDGPU: Move isa version and EF_AMDGPU_MACH_* determination into TargetParser."
This reverts commit r341982.
The change introduced a layering violation. Reverting to unbreak
our integrate.
Removed:
llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll
Modified:
llvm/trunk/include/llvm/Support/TargetParser.h
llvm/trunk/lib/Support/TargetParser.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s
Modified: llvm/trunk/include/llvm/Support/TargetParser.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/TargetParser.h?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/TargetParser.h (original)
+++ llvm/trunk/include/llvm/Support/TargetParser.h Wed Sep 12 00:05:30 2018
@@ -320,13 +320,6 @@ enum GPUKind : uint32_t {
GK_AMDGCN_LAST = GK_GFX906,
};
-/// Instruction set architecture version.
-struct IsaVersion {
- unsigned Major;
- unsigned Minor;
- unsigned Stepping;
-};
-
// This isn't comprehensive for now, just things that are needed from the
// frontend driver.
enum ArchFeatureKind : uint32_t {
@@ -342,22 +335,18 @@ enum ArchFeatureKind : uint32_t {
FEATURE_FAST_DENORMAL_F32 = 1 << 5
};
+GPUKind parseArchAMDGCN(StringRef CPU);
+GPUKind parseArchR600(StringRef CPU);
StringRef getArchNameAMDGCN(GPUKind AK);
StringRef getArchNameR600(GPUKind AK);
StringRef getCanonicalArchName(StringRef Arch);
-GPUKind parseArchAMDGCN(StringRef CPU);
-GPUKind parseArchR600(StringRef CPU);
unsigned getArchAttrAMDGCN(GPUKind AK);
unsigned getArchAttrR600(GPUKind AK);
void fillValidArchListAMDGCN(SmallVectorImpl<StringRef> &Values);
void fillValidArchListR600(SmallVectorImpl<StringRef> &Values);
-StringRef getArchNameFromElfMach(unsigned ElfMach);
-unsigned getElfMach(StringRef GPU);
-IsaVersion getIsaVersion(StringRef GPU);
-
-} // namespace AMDGPU
+}
} // namespace llvm
Modified: llvm/trunk/lib/Support/TargetParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/TargetParser.cpp?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/lib/Support/TargetParser.cpp (original)
+++ llvm/trunk/lib/Support/TargetParser.cpp Wed Sep 12 00:05:30 2018
@@ -17,13 +17,11 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/ADT/Twine.h"
-#include "llvm/BinaryFormat/ELF.h"
#include <cctype>
using namespace llvm;
using namespace ARM;
using namespace AArch64;
-using namespace AMDGPU;
namespace {
@@ -949,8 +947,6 @@ bool llvm::AArch64::isX18ReservedByDefau
TT.isOSWindows();
}
-namespace {
-
struct GPUInfo {
StringLiteral Name;
StringLiteral CanonicalName;
@@ -958,9 +954,11 @@ struct GPUInfo {
unsigned Features;
};
-constexpr GPUInfo R600GPUs[26] = {
- // Name Canonical Kind Features
- // Name
+using namespace AMDGPU;
+static constexpr GPUInfo R600GPUs[26] = {
+ // Name Canonical Kind Features
+ // Name
+ //
{{"r600"}, {"r600"}, GK_R600, FEATURE_NONE },
{{"rv630"}, {"r600"}, GK_R600, FEATURE_NONE },
{{"rv635"}, {"r600"}, GK_R600, FEATURE_NONE },
@@ -991,9 +989,9 @@ constexpr GPUInfo R600GPUs[26] = {
// This table should be sorted by the value of GPUKind
// Don't bother listing the implicitly true features
-constexpr GPUInfo AMDGCNGPUs[32] = {
- // Name Canonical Kind Features
- // Name
+static constexpr GPUInfo AMDGCNGPUs[32] = {
+ // Name Canonical Kind Features
+ // Name
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
{{"tahiti"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
{{"gfx601"}, {"gfx601"}, GK_GFX601, FEATURE_NONE},
@@ -1028,7 +1026,8 @@ constexpr GPUInfo AMDGCNGPUs[32] = {
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
};
-const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
+static const GPUInfo *getArchEntry(AMDGPU::GPUKind AK,
+ ArrayRef<GPUInfo> Table) {
GPUInfo Search = { {""}, {""}, AK, AMDGPU::FEATURE_NONE };
auto I = std::lower_bound(Table.begin(), Table.end(), Search,
@@ -1041,8 +1040,6 @@ const GPUInfo *getArchEntry(AMDGPU::GPUK
return I;
}
-} // namespace
-
StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
return Entry->CanonicalName;
@@ -1095,118 +1092,3 @@ void AMDGPU::fillValidArchListR600(Small
for (const auto C : R600GPUs)
Values.push_back(C.Name);
}
-
-StringRef AMDGPU::getArchNameFromElfMach(unsigned ElfMach) {
- AMDGPU::GPUKind AK;
-
- switch (ElfMach) {
- case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
- case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
- case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
- case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
- case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
- case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
- case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
- case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
- case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
- case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
- case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
- case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
- case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
- case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
- case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
- case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
- case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
- }
-
- StringRef GPUName = getArchNameAMDGCN(AK);
- if (GPUName != "")
- return GPUName;
- return getArchNameR600(AK);
-}
-
-unsigned AMDGPU::getElfMach(StringRef GPU) {
- AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
- if (AK == AMDGPU::GPUKind::GK_NONE)
- AK = parseArchR600(GPU);
-
- switch (AK) {
- case GK_R600: return ELF::EF_AMDGPU_MACH_R600_R600;
- case GK_R630: return ELF::EF_AMDGPU_MACH_R600_R630;
- case GK_RS880: return ELF::EF_AMDGPU_MACH_R600_RS880;
- case GK_RV670: return ELF::EF_AMDGPU_MACH_R600_RV670;
- case GK_RV710: return ELF::EF_AMDGPU_MACH_R600_RV710;
- case GK_RV730: return ELF::EF_AMDGPU_MACH_R600_RV730;
- case GK_RV770: return ELF::EF_AMDGPU_MACH_R600_RV770;
- case GK_CEDAR: return ELF::EF_AMDGPU_MACH_R600_CEDAR;
- case GK_CYPRESS: return ELF::EF_AMDGPU_MACH_R600_CYPRESS;
- case GK_JUNIPER: return ELF::EF_AMDGPU_MACH_R600_JUNIPER;
- case GK_REDWOOD: return ELF::EF_AMDGPU_MACH_R600_REDWOOD;
- case GK_SUMO: return ELF::EF_AMDGPU_MACH_R600_SUMO;
- case GK_BARTS: return ELF::EF_AMDGPU_MACH_R600_BARTS;
- case GK_CAICOS: return ELF::EF_AMDGPU_MACH_R600_CAICOS;
- case GK_CAYMAN: return ELF::EF_AMDGPU_MACH_R600_CAYMAN;
- case GK_TURKS: return ELF::EF_AMDGPU_MACH_R600_TURKS;
- case GK_GFX600: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX600;
- case GK_GFX601: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX601;
- case GK_GFX700: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX700;
- case GK_GFX701: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX701;
- case GK_GFX702: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX702;
- case GK_GFX703: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX703;
- case GK_GFX704: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX704;
- case GK_GFX801: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX801;
- case GK_GFX802: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX802;
- case GK_GFX803: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX803;
- case GK_GFX810: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX810;
- case GK_GFX900: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX900;
- case GK_GFX902: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX902;
- case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
- case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
- case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
- }
-
- llvm_unreachable("unknown GPU");
-}
-
-AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
- if (GPU == "generic")
- return {7, 0, 0};
-
- AMDGPU::GPUKind AK = parseArchAMDGCN(GPU);
- if (AK == AMDGPU::GPUKind::GK_NONE)
- return {0, 0, 0};
-
- switch (AK) {
- case GK_GFX600: return {6, 0, 0};
- case GK_GFX601: return {6, 0, 1};
- case GK_GFX700: return {7, 0, 0};
- case GK_GFX701: return {7, 0, 1};
- case GK_GFX702: return {7, 0, 2};
- case GK_GFX703: return {7, 0, 3};
- case GK_GFX704: return {7, 0, 4};
- case GK_GFX801: return {8, 0, 1};
- case GK_GFX802: return {8, 0, 2};
- case GK_GFX803: return {8, 0, 3};
- case GK_GFX810: return {8, 1, 0};
- case GK_GFX900: return {9, 0, 0};
- case GK_GFX902: return {9, 0, 2};
- case GK_GFX904: return {9, 0, 4};
- case GK_GFX906: return {9, 0, 6};
- default: return {0, 0, 0};
- }
-}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Wed Sep 12 00:05:30 2018
@@ -40,7 +40,6 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
@@ -135,9 +134,9 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFil
getTargetStreamer()->EmitDirectiveHSACodeObjectVersion(2, 1);
// HSA and PAL emit NT_AMDGPU_HSA_ISA for code objects v2.
- IsaVersion Version = getIsaVersion(getSTI()->getCPU());
+ IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(getSTI()->getFeatureBits());
getTargetStreamer()->EmitDirectiveHSACodeObjectISA(
- Version.Major, Version.Minor, Version.Stepping, "AMD", "AMDGPU");
+ ISA.Major, ISA.Minor, ISA.Stepping, "AMD", "AMDGPU");
}
void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) {
@@ -241,7 +240,7 @@ void AMDGPUAsmPrinter::EmitFunctionBodyE
*getSTI(), KernelName, getAmdhsaKernelDescriptor(*MF, CurrentProgramInfo),
CurrentProgramInfo.NumVGPRsForWavesPerEU,
CurrentProgramInfo.NumSGPRsForWavesPerEU -
- IsaInfo::getNumExtraSGPRs(getSTI(),
+ IsaInfo::getNumExtraSGPRs(getSTI()->getFeatureBits(),
CurrentProgramInfo.VCCUsed,
CurrentProgramInfo.FlatUsed),
CurrentProgramInfo.VCCUsed, CurrentProgramInfo.FlatUsed,
@@ -562,7 +561,7 @@ static bool hasAnyNonFlatUseOfReg(const
int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs(
const GCNSubtarget &ST) const {
- return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(&ST,
+ return NumExplicitSGPR + IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(),
UsesVCC, UsesFlatScratch);
}
@@ -759,7 +758,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo
// 48 SGPRs - vcc, - flat_scr, -xnack
int MaxSGPRGuess =
- 47 - IsaInfo::getNumExtraSGPRs(getSTI(), true,
+ 47 - IsaInfo::getNumExtraSGPRs(ST.getFeatureBits(), true,
ST.hasFlatAddressSpace());
MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess);
MaxVGPR = std::max(MaxVGPR, 23);
@@ -824,7 +823,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
// duplicated in part in AMDGPUAsmParser::calculateGPRBlocks, and could be
// unified.
unsigned ExtraSGPRs = IsaInfo::getNumExtraSGPRs(
- getSTI(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
+ STM.getFeatureBits(), ProgInfo.VCCUsed, ProgInfo.FlatUsed);
// Check the addressable register limit before we add ExtraSGPRs.
if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
@@ -906,9 +905,9 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
}
ProgInfo.SGPRBlocks = IsaInfo::getNumSGPRBlocks(
- getSTI(), ProgInfo.NumSGPRsForWavesPerEU);
+ STM.getFeatureBits(), ProgInfo.NumSGPRsForWavesPerEU);
ProgInfo.VGPRBlocks = IsaInfo::getNumVGPRBlocks(
- getSTI(), ProgInfo.NumVGPRsForWavesPerEU);
+ STM.getFeatureBits(), ProgInfo.NumVGPRsForWavesPerEU);
// Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
// DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
@@ -1138,7 +1137,7 @@ void AMDGPUAsmPrinter::getAmdKernelCode(
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
- AMDGPU::initDefaultAMDKernelCodeT(Out, getSTI());
+ AMDGPU::initDefaultAMDKernelCodeT(Out, STM.getFeatureBits());
Out.compute_pgm_resource_registers =
CurrentProgramInfo.ComputePGMRSrc1 |
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Wed Sep 12 00:05:30 2018
@@ -124,8 +124,10 @@ GCNSubtarget::initializeSubtargetDepende
return *this;
}
-AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT) :
+AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT,
+ const FeatureBitset &FeatureBits) :
TargetTriple(TT),
+ SubtargetFeatureBits(FeatureBits),
Has16BitInsts(false),
HasMadMixInsts(false),
FP32Denormals(false),
@@ -142,9 +144,9 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
{ }
GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
- const GCNTargetMachine &TM) :
+ const GCNTargetMachine &TM) :
AMDGPUGenSubtargetInfo(TT, GPU, FS),
- AMDGPUSubtarget(TT),
+ AMDGPUSubtarget(TT, getFeatureBits()),
TargetTriple(TT),
Gen(SOUTHERN_ISLANDS),
IsaVersion(ISAVersion0_0_0),
@@ -446,7 +448,7 @@ unsigned AMDGPUSubtarget::getKernArgSegm
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
const TargetMachine &TM) :
R600GenSubtargetInfo(TT, GPU, FS),
- AMDGPUSubtarget(TT),
+ AMDGPUSubtarget(TT, getFeatureBits()),
InstrInfo(*this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
FMA(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Wed Sep 12 00:05:30 2018
@@ -63,6 +63,7 @@ private:
Triple TargetTriple;
protected:
+ const FeatureBitset &SubtargetFeatureBits;
bool Has16BitInsts;
bool HasMadMixInsts;
bool FP32Denormals;
@@ -78,7 +79,7 @@ protected:
unsigned WavefrontSize;
public:
- AMDGPUSubtarget(const Triple &TT);
+ AMDGPUSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
static const AMDGPUSubtarget &get(const MachineFunction &MF);
static const AMDGPUSubtarget &get(const TargetMachine &TM,
@@ -202,21 +203,33 @@ public:
/// \returns Maximum number of work groups per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
- virtual unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const = 0;
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
+ FlatWorkGroupSize);
+ }
/// \returns Minimum flat work group size supported by the subtarget.
- virtual unsigned getMinFlatWorkGroupSize() const = 0;
+ unsigned getMinFlatWorkGroupSize() const {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
+ }
/// \returns Maximum flat work group size supported by the subtarget.
- virtual unsigned getMaxFlatWorkGroupSize() const = 0;
+ unsigned getMaxFlatWorkGroupSize() const {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
+ }
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
- virtual unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const = 0;
+ unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
+ FlatWorkGroupSize);
+ }
/// \returns Minimum number of waves per execution unit supported by the
/// subtarget.
- virtual unsigned getMinWavesPerEU() const = 0;
+ unsigned getMinWavesPerEU() const {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
+ }
unsigned getMaxWavesPerEU() const { return 10; }
@@ -695,19 +708,20 @@ public:
/// \returns Number of execution units per compute unit supported by the
/// subtarget.
unsigned getEUsPerCU() const {
- return AMDGPU::IsaInfo::getEUsPerCU(this);
+ return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerCU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(this);
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(this, FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
+ FlatWorkGroupSize);
}
/// \returns Maximum number of waves per execution unit supported by the
@@ -719,7 +733,8 @@ public:
/// \returns Number of waves per work group supported by the subtarget and
/// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getWavesPerWorkGroup(this, FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getWavesPerWorkGroup(
+ MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
}
// static wrappers
@@ -838,34 +853,39 @@ public:
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getSGPRAllocGranule(this);
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns SGPR encoding granularity supported by the subtarget.
unsigned getSGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getSGPREncodingGranule(this);
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns Total number of SGPRs supported by the subtarget.
unsigned getTotalNumSGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumSGPRs(this);
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
}
/// \returns Addressable number of SGPRs supported by the subtarget.
unsigned getAddressableNumSGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumSGPRs(this);
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns Minimum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumSGPRs(this, WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
+ WavesPerEU);
}
/// \returns Maximum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
- return AMDGPU::IsaInfo::getMaxNumSGPRs(this, WavesPerEU, Addressable);
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
+ WavesPerEU, Addressable);
}
/// \returns Reserved number of SGPRs for given function \p MF.
@@ -883,34 +903,39 @@ public:
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getVGPRAllocGranule(this);
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns VGPR encoding granularity supported by the subtarget.
unsigned getVGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getVGPREncodingGranule(this);
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns Total number of VGPRs supported by the subtarget.
unsigned getTotalNumVGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
}
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumVGPRs(this, WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
+ WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMaxNumVGPRs(this, WavesPerEU);
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
+ WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -926,34 +951,6 @@ public:
void getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
const override;
-
- /// \returns Maximum number of work groups per compute unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
- return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
- }
-
- /// \returns Minimum flat work group size supported by the subtarget.
- unsigned getMinFlatWorkGroupSize() const override {
- return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
- }
-
- /// \returns Maximum flat work group size supported by the subtarget.
- unsigned getMaxFlatWorkGroupSize() const override {
- return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
- }
-
- /// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
- }
-
- /// \returns Minimum number of waves per execution unit supported by the
- /// subtarget.
- unsigned getMinWavesPerEU() const override {
- return AMDGPU::IsaInfo::getMinWavesPerEU(this);
- }
};
class R600Subtarget final : public R600GenSubtargetInfo,
@@ -1064,34 +1061,6 @@ public:
bool enableSubRegLiveness() const override {
return true;
}
-
- /// \returns Maximum number of work groups per compute unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const override {
- return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(this, FlatWorkGroupSize);
- }
-
- /// \returns Minimum flat work group size supported by the subtarget.
- unsigned getMinFlatWorkGroupSize() const override {
- return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(this);
- }
-
- /// \returns Maximum flat work group size supported by the subtarget.
- unsigned getMaxFlatWorkGroupSize() const override {
- return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(this);
- }
-
- /// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const override {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(this, FlatWorkGroupSize);
- }
-
- /// \returns Minimum number of waves per execution unit supported by the
- /// subtarget.
- unsigned getMinWavesPerEU() const override {
- return AMDGPU::IsaInfo::getMinWavesPerEU(this);
- }
};
} // end namespace llvm
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed Sep 12 00:05:30 2018
@@ -49,7 +49,6 @@
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/SMLoc.h"
-#include "llvm/Support/TargetParser.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -918,7 +917,8 @@ public:
// Currently there is none suitable machinery in the core llvm-mc for this.
// MCSymbol::isRedefinable is intended for another purpose, and
// AsmParser::parseDirectiveSet() cannot be specialized for specific target.
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
MCContext &Ctx = getContext();
if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) {
MCSymbol *Sym =
@@ -1826,7 +1826,7 @@ bool AMDGPUAsmParser::updateGprCountSymb
unsigned DwordRegIndex,
unsigned RegWidth) {
// Symbols are only defined for GCN targets
- if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
+ if (AMDGPU::IsaInfo::getIsaVersion(getFeatureBits()).Major < 6)
return true;
auto SymbolName = getGprCountSymbolName(RegKind);
@@ -2637,18 +2637,18 @@ bool AMDGPUAsmParser::calculateGPRBlocks
unsigned &SGPRBlocks) {
// TODO(scott.linder): These calculations are duplicated from
// AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
- IsaVersion Version = getIsaVersion(getSTI().getCPU());
+ IsaInfo::IsaVersion Version = IsaInfo::getIsaVersion(Features);
unsigned NumVGPRs = NextFreeVGPR;
unsigned NumSGPRs = NextFreeSGPR;
- unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI());
+ unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(Features);
if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
NumSGPRs > MaxAddressableNumSGPRs)
return OutOfRangeError(SGPRRange);
NumSGPRs +=
- IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
+ IsaInfo::getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed, XNACKUsed);
if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
NumSGPRs > MaxAddressableNumSGPRs)
@@ -2657,8 +2657,8 @@ bool AMDGPUAsmParser::calculateGPRBlocks
if (Features.test(FeatureSGPRInitBug))
NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
- VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs);
- SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
+ VGPRBlocks = IsaInfo::getNumVGPRBlocks(Features, NumVGPRs);
+ SGPRBlocks = IsaInfo::getNumSGPRBlocks(Features, NumSGPRs);
return false;
}
@@ -2678,7 +2678,8 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
StringSet<> Seen;
- IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
+ IsaInfo::IsaVersion IVersion =
+ IsaInfo::getIsaVersion(getSTI().getFeatureBits());
SMRange VGPRRange;
uint64_t NextFreeVGPR = 0;
@@ -2937,7 +2938,8 @@ bool AMDGPUAsmParser::ParseDirectiveHSAC
// If this directive has no arguments, then use the ISA version for the
// targeted GPU.
if (getLexer().is(AsmToken::EndOfStatement)) {
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor,
ISA.Stepping,
"AMD", "AMDGPU");
@@ -2999,7 +3001,7 @@ bool AMDGPUAsmParser::ParseAMDKernelCode
bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
amd_kernel_code_t Header;
- AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
+ AMDGPU::initDefaultAMDKernelCodeT(Header, getFeatureBits());
while (true) {
// Lex EndOfStatement. This is in a while loop, because lexing a comment
@@ -3677,12 +3679,12 @@ void AMDGPUAsmParser::cvtExp(MCInst &Ins
static bool
encodeCnt(
- const AMDGPU::IsaVersion ISA,
+ const AMDGPU::IsaInfo::IsaVersion ISA,
int64_t &IntVal,
int64_t CntVal,
bool Saturate,
- unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
- unsigned (*decode)(const IsaVersion &Version, unsigned))
+ unsigned (*encode)(const IsaInfo::IsaVersion &Version, unsigned, unsigned),
+ unsigned (*decode)(const IsaInfo::IsaVersion &Version, unsigned))
{
bool Failed = false;
@@ -3713,7 +3715,8 @@ bool AMDGPUAsmParser::parseCnt(int64_t &
if (getParser().parseAbsoluteExpression(CntVal))
return true;
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
bool Failed = true;
bool Sat = CntName.endswith("_sat");
@@ -3748,7 +3751,8 @@ bool AMDGPUAsmParser::parseCnt(int64_t &
OperandMatchResultTy
AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(getFeatureBits());
int64_t Waitcnt = getWaitcntBitMask(ISA);
SMLoc S = Parser.getTok().getLoc();
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Wed Sep 12 00:05:30 2018
@@ -1155,7 +1155,8 @@ void AMDGPUInstPrinter::printSwizzle(con
void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI.getCPU());
+ AMDGPU::IsaInfo::IsaVersion ISA =
+ AMDGPU::IsaInfo::getIsaVersion(STI.getFeatureBits());
unsigned SImm16 = MI->getOperand(OpNo).getImm();
unsigned Vmcnt, Expcnt, Lgkmcnt;
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp Wed Sep 12 00:05:30 2018
@@ -27,7 +27,6 @@
#include "llvm/MC/MCObjectFileInfo.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/Support/FormattedStream.h"
-#include "llvm/Support/TargetParser.h"
namespace llvm {
#include "AMDGPUPTNote.h"
@@ -40,6 +39,84 @@ using namespace llvm::AMDGPU;
// AMDGPUTargetStreamer
//===----------------------------------------------------------------------===//
+static const struct {
+ const char *Name;
+ unsigned Mach;
+} MachTable[] = {
+ // Radeon HD 2000/3000 Series (R600).
+ { "r600", ELF::EF_AMDGPU_MACH_R600_R600 },
+ { "r630", ELF::EF_AMDGPU_MACH_R600_R630 },
+ { "rs880", ELF::EF_AMDGPU_MACH_R600_RS880 },
+ { "rv670", ELF::EF_AMDGPU_MACH_R600_RV670 },
+ // Radeon HD 4000 Series (R700).
+ { "rv710", ELF::EF_AMDGPU_MACH_R600_RV710 },
+ { "rv730", ELF::EF_AMDGPU_MACH_R600_RV730 },
+ { "rv770", ELF::EF_AMDGPU_MACH_R600_RV770 },
+ // Radeon HD 5000 Series (Evergreen).
+ { "cedar", ELF::EF_AMDGPU_MACH_R600_CEDAR },
+ { "cypress", ELF::EF_AMDGPU_MACH_R600_CYPRESS },
+ { "juniper", ELF::EF_AMDGPU_MACH_R600_JUNIPER },
+ { "redwood", ELF::EF_AMDGPU_MACH_R600_REDWOOD },
+ { "sumo", ELF::EF_AMDGPU_MACH_R600_SUMO },
+ // Radeon HD 6000 Series (Northern Islands).
+ { "barts", ELF::EF_AMDGPU_MACH_R600_BARTS },
+ { "caicos", ELF::EF_AMDGPU_MACH_R600_CAICOS },
+ { "cayman", ELF::EF_AMDGPU_MACH_R600_CAYMAN },
+ { "turks", ELF::EF_AMDGPU_MACH_R600_TURKS },
+ // AMDGCN GFX6.
+ { "gfx600", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
+ { "tahiti", ELF::EF_AMDGPU_MACH_AMDGCN_GFX600 },
+ { "gfx601", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
+ { "hainan", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
+ { "oland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
+ { "pitcairn", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
+ { "verde", ELF::EF_AMDGPU_MACH_AMDGCN_GFX601 },
+ // AMDGCN GFX7.
+ { "gfx700", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
+ { "kaveri", ELF::EF_AMDGPU_MACH_AMDGCN_GFX700 },
+ { "gfx701", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
+ { "hawaii", ELF::EF_AMDGPU_MACH_AMDGCN_GFX701 },
+ { "gfx702", ELF::EF_AMDGPU_MACH_AMDGCN_GFX702 },
+ { "gfx703", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
+ { "kabini", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
+ { "mullins", ELF::EF_AMDGPU_MACH_AMDGCN_GFX703 },
+ { "gfx704", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
+ { "bonaire", ELF::EF_AMDGPU_MACH_AMDGCN_GFX704 },
+ // AMDGCN GFX8.
+ { "gfx801", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
+ { "carrizo", ELF::EF_AMDGPU_MACH_AMDGCN_GFX801 },
+ { "gfx802", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
+ { "iceland", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
+ { "tonga", ELF::EF_AMDGPU_MACH_AMDGCN_GFX802 },
+ { "gfx803", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
+ { "fiji", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
+ { "polaris10", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
+ { "polaris11", ELF::EF_AMDGPU_MACH_AMDGCN_GFX803 },
+ { "gfx810", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
+ { "stoney", ELF::EF_AMDGPU_MACH_AMDGCN_GFX810 },
+ // AMDGCN GFX9.
+ { "gfx900", ELF::EF_AMDGPU_MACH_AMDGCN_GFX900 },
+ { "gfx902", ELF::EF_AMDGPU_MACH_AMDGCN_GFX902 },
+ { "gfx904", ELF::EF_AMDGPU_MACH_AMDGCN_GFX904 },
+ { "gfx906", ELF::EF_AMDGPU_MACH_AMDGCN_GFX906 },
+ // Not specified processor.
+ { nullptr, ELF::EF_AMDGPU_MACH_NONE }
+};
+
+unsigned AMDGPUTargetStreamer::getMACH(StringRef GPU) const {
+ auto Entry = MachTable;
+ for (; Entry->Name && GPU != Entry->Name; ++Entry)
+ ;
+ return Entry->Mach;
+}
+
+const char *AMDGPUTargetStreamer::getMachName(unsigned Mach) {
+ auto Entry = MachTable;
+ for (; Entry->Name && Mach != Entry->Mach; ++Entry)
+ ;
+ return Entry->Name;
+}
+
bool AMDGPUTargetStreamer::EmitHSAMetadata(StringRef HSAMetadataString) {
HSAMD::Metadata HSAMetadata;
if (HSAMD::fromString(HSAMetadataString, HSAMetadata))
@@ -128,7 +205,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsa
bool ReserveVCC, bool ReserveFlatScr, bool ReserveXNACK) {
amdhsa::kernel_descriptor_t DefaultKD = getDefaultAmdhsaKernelDescriptor();
- IsaVersion IVersion = getIsaVersion(STI.getCPU());
+ IsaInfo::IsaVersion IVersion = IsaInfo::getIsaVersion(STI.getFeatureBits());
OS << "\t.amdhsa_kernel " << KernelName << '\n';
@@ -265,7 +342,7 @@ AMDGPUTargetELFStreamer::AMDGPUTargetELF
unsigned EFlags = MCA.getELFHeaderEFlags();
EFlags &= ~ELF::EF_AMDGPU_MACH;
- EFlags |= getElfMach(STI.getCPU());
+ EFlags |= getMACH(STI.getCPU());
EFlags &= ~ELF::EF_AMDGPU_XNACK;
if (AMDGPU::hasXNACK(STI))
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h Wed Sep 12 00:05:30 2018
@@ -31,7 +31,13 @@ class AMDGPUTargetStreamer : public MCTa
protected:
MCContext &getContext() const { return Streamer.getContext(); }
+ /// \returns Equivalent EF_AMDGPU_MACH_* value for given \p GPU name.
+ unsigned getMACH(StringRef GPU) const;
+
public:
+ /// \returns Equivalent GPU name for an EF_AMDGPU_MACH_* value.
+ static const char *getMachName(unsigned Mach);
+
AMDGPUTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {}
virtual void EmitDirectiveAMDGCNTarget(StringRef Target) = 0;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Wed Sep 12 00:05:30 2018
@@ -369,7 +369,7 @@ private:
const SIRegisterInfo *TRI = nullptr;
const MachineRegisterInfo *MRI = nullptr;
const MachineLoopInfo *MLI = nullptr;
- AMDGPU::IsaVersion IV;
+ AMDGPU::IsaInfo::IsaVersion IV;
DenseSet<MachineBasicBlock *> BlockVisitedSet;
DenseSet<MachineInstr *> TrackedWaitcntSet;
@@ -1841,7 +1841,7 @@ bool SIInsertWaitcnts::runOnMachineFunct
TRI = &TII->getRegisterInfo();
MRI = &MF.getRegInfo();
MLI = &getAnalysis<MachineLoopInfo>();
- IV = AMDGPU::getIsaVersion(ST->getCPU());
+ IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
ForceEmitZeroWaitcnts = ForceEmitZeroFlag;
Modified: llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMemoryLegalizer.cpp Wed Sep 12 00:05:30 2018
@@ -253,7 +253,7 @@ protected:
/// Instruction info.
const SIInstrInfo *TII = nullptr;
- IsaVersion IV;
+ IsaInfo::IsaVersion IV;
SICacheControl(const GCNSubtarget &ST);
@@ -605,7 +605,7 @@ Optional<SIMemOpInfo> SIMemOpAccess::get
SICacheControl::SICacheControl(const GCNSubtarget &ST) {
TII = ST.getInstrInfo();
- IV = getIsaVersion(ST.getCPU());
+ IV = IsaInfo::getIsaVersion(ST.getFeatureBits());
}
/* static */
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Wed Sep 12 00:05:30 2018
@@ -137,18 +137,68 @@ int getMCOpcode(uint16_t Opcode, unsigne
namespace IsaInfo {
+IsaVersion getIsaVersion(const FeatureBitset &Features) {
+ // GCN GFX6 (Southern Islands (SI)).
+ if (Features.test(FeatureISAVersion6_0_0))
+ return {6, 0, 0};
+ if (Features.test(FeatureISAVersion6_0_1))
+ return {6, 0, 1};
+
+ // GCN GFX7 (Sea Islands (CI)).
+ if (Features.test(FeatureISAVersion7_0_0))
+ return {7, 0, 0};
+ if (Features.test(FeatureISAVersion7_0_1))
+ return {7, 0, 1};
+ if (Features.test(FeatureISAVersion7_0_2))
+ return {7, 0, 2};
+ if (Features.test(FeatureISAVersion7_0_3))
+ return {7, 0, 3};
+ if (Features.test(FeatureISAVersion7_0_4))
+ return {7, 0, 4};
+ if (Features.test(FeatureSeaIslands))
+ return {7, 0, 0};
+
+ // GCN GFX8 (Volcanic Islands (VI)).
+ if (Features.test(FeatureISAVersion8_0_1))
+ return {8, 0, 1};
+ if (Features.test(FeatureISAVersion8_0_2))
+ return {8, 0, 2};
+ if (Features.test(FeatureISAVersion8_0_3))
+ return {8, 0, 3};
+ if (Features.test(FeatureISAVersion8_1_0))
+ return {8, 1, 0};
+ if (Features.test(FeatureVolcanicIslands))
+ return {8, 0, 0};
+
+ // GCN GFX9.
+ if (Features.test(FeatureISAVersion9_0_0))
+ return {9, 0, 0};
+ if (Features.test(FeatureISAVersion9_0_2))
+ return {9, 0, 2};
+ if (Features.test(FeatureISAVersion9_0_4))
+ return {9, 0, 4};
+ if (Features.test(FeatureISAVersion9_0_6))
+ return {9, 0, 6};
+ if (Features.test(FeatureGFX9))
+ return {9, 0, 0};
+
+ if (Features.test(FeatureSouthernIslands))
+ return {0, 0, 0};
+ return {7, 0, 0};
+}
+
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
auto TargetTriple = STI->getTargetTriple();
- auto Version = getIsaVersion(STI->getCPU());
+ auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
Stream << TargetTriple.getArchName() << '-'
<< TargetTriple.getVendorName() << '-'
<< TargetTriple.getOSName() << '-'
<< TargetTriple.getEnvironmentName() << '-'
<< "gfx"
- << Version.Major
- << Version.Minor
- << Version.Stepping;
+ << ISAVersion.Major
+ << ISAVersion.Minor
+ << ISAVersion.Stepping;
if (hasXNACK(*STI))
Stream << "+xnack";
@@ -160,49 +210,49 @@ bool hasCodeObjectV3(const MCSubtargetIn
return STI->getFeatureBits().test(FeatureCodeObjectV3);
}
-unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
- if (STI->getFeatureBits().test(FeatureWavefrontSize16))
+unsigned getWavefrontSize(const FeatureBitset &Features) {
+ if (Features.test(FeatureWavefrontSize16))
return 16;
- if (STI->getFeatureBits().test(FeatureWavefrontSize32))
+ if (Features.test(FeatureWavefrontSize32))
return 32;
return 64;
}
-unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
- if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
+unsigned getLocalMemorySize(const FeatureBitset &Features) {
+ if (Features.test(FeatureLocalMemorySize32768))
return 32768;
- if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
+ if (Features.test(FeatureLocalMemorySize65536))
return 65536;
return 0;
}
-unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
+unsigned getEUsPerCU(const FeatureBitset &Features) {
return 4;
}
-unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
+unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
unsigned FlatWorkGroupSize) {
- if (!STI->getFeatureBits().test(FeatureGCN))
+ if (!Features.test(FeatureGCN))
return 8;
- unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
+ unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
if (N == 1)
return 40;
N = 40 / N;
return std::min(N, 16u);
}
-unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI) {
- return getMaxWavesPerEU() * getEUsPerCU(STI);
+unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
+ return getMaxWavesPerEU() * getEUsPerCU(Features);
}
-unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
+unsigned getMaxWavesPerCU(const FeatureBitset &Features,
unsigned FlatWorkGroupSize) {
- return getWavesPerWorkGroup(STI, FlatWorkGroupSize);
+ return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
}
-unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
+unsigned getMinWavesPerEU(const FeatureBitset &Features) {
return 1;
}
@@ -211,89 +261,89 @@ unsigned getMaxWavesPerEU() {
return 10;
}
-unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
+unsigned getMaxWavesPerEU(const FeatureBitset &Features,
unsigned FlatWorkGroupSize) {
- return alignTo(getMaxWavesPerCU(STI, FlatWorkGroupSize),
- getEUsPerCU(STI)) / getEUsPerCU(STI);
+ return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
+ getEUsPerCU(Features)) / getEUsPerCU(Features);
}
-unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
+unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
return 1;
}
-unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
+unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
return 2048;
}
-unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
+unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
unsigned FlatWorkGroupSize) {
- return alignTo(FlatWorkGroupSize, getWavefrontSize(STI)) /
- getWavefrontSize(STI);
+ return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
+ getWavefrontSize(Features);
}
-unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
- IsaVersion Version = getIsaVersion(STI->getCPU());
+unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
+ IsaVersion Version = getIsaVersion(Features);
if (Version.Major >= 8)
return 16;
return 8;
}
-unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
+unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
return 8;
}
-unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
- IsaVersion Version = getIsaVersion(STI->getCPU());
+unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
+ IsaVersion Version = getIsaVersion(Features);
if (Version.Major >= 8)
return 800;
return 512;
}
-unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
- if (STI->getFeatureBits().test(FeatureSGPRInitBug))
+unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
+ if (Features.test(FeatureSGPRInitBug))
return FIXED_NUM_SGPRS_FOR_INIT_BUG;
- IsaVersion Version = getIsaVersion(STI->getCPU());
+ IsaVersion Version = getIsaVersion(Features);
if (Version.Major >= 8)
return 102;
return 104;
}
-unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
+unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
- unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
- if (STI->getFeatureBits().test(FeatureTrapHandler))
+ unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
+ if (Features.test(FeatureTrapHandler))
MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
- MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
- return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
+ MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
+ return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
}
-unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
+unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
bool Addressable) {
assert(WavesPerEU != 0);
- IsaVersion Version = getIsaVersion(STI->getCPU());
- unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
+ IsaVersion Version = getIsaVersion(Features);
+ unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
if (Version.Major >= 8 && !Addressable)
AddressableNumSGPRs = 112;
- unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
- if (STI->getFeatureBits().test(FeatureTrapHandler))
+ unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
+ if (Features.test(FeatureTrapHandler))
MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
- MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
+ MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
return std::min(MaxNumSGPRs, AddressableNumSGPRs);
}
-unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
+unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
bool FlatScrUsed, bool XNACKUsed) {
unsigned ExtraSGPRs = 0;
if (VCCUsed)
ExtraSGPRs = 2;
- IsaVersion Version = getIsaVersion(STI->getCPU());
+ IsaVersion Version = getIsaVersion(Features);
if (Version.Major < 8) {
if (FlatScrUsed)
ExtraSGPRs = 4;
@@ -308,74 +358,74 @@ unsigned getNumExtraSGPRs(const MCSubtar
return ExtraSGPRs;
}
-unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
+unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
bool FlatScrUsed) {
- return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
- STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
+ return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
+ Features[AMDGPU::FeatureXNACK]);
}
-unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
- NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
+unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
+ NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
// SGPRBlocks is actual number of SGPR blocks minus 1.
- return NumSGPRs / getSGPREncodingGranule(STI) - 1;
+ return NumSGPRs / getSGPREncodingGranule(Features) - 1;
}
-unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI) {
+unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
return 4;
}
-unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI) {
- return getVGPRAllocGranule(STI);
+unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
+ return getVGPRAllocGranule(Features);
}
-unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
+unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
return 256;
}
-unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
- return getTotalNumVGPRs(STI);
+unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
+ return getTotalNumVGPRs(Features);
}
-unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
+unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
unsigned MinNumVGPRs =
- alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
- getVGPRAllocGranule(STI)) + 1;
- return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
+ alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
+ getVGPRAllocGranule(Features)) + 1;
+ return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
}
-unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
+unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
- unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
- getVGPRAllocGranule(STI));
- unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
+ unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
+ getVGPRAllocGranule(Features));
+ unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
return std::min(MaxNumVGPRs, AddressableNumVGPRs);
}
-unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs) {
- NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(STI));
+unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
+ NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
// VGPRBlocks is actual number of VGPR blocks minus 1.
- return NumVGPRs / getVGPREncodingGranule(STI) - 1;
+ return NumVGPRs / getVGPREncodingGranule(Features) - 1;
}
} // end namespace IsaInfo
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
- const MCSubtargetInfo *STI) {
- IsaVersion Version = getIsaVersion(STI->getCPU());
+ const FeatureBitset &Features) {
+ IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
memset(&Header, 0, sizeof(Header));
Header.amd_kernel_code_version_major = 1;
Header.amd_kernel_code_version_minor = 2;
Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
- Header.amd_machine_version_major = Version.Major;
- Header.amd_machine_version_minor = Version.Minor;
- Header.amd_machine_version_stepping = Version.Stepping;
+ Header.amd_machine_version_major = ISA.Major;
+ Header.amd_machine_version_minor = ISA.Minor;
+ Header.amd_machine_version_stepping = ISA.Stepping;
Header.kernel_code_entry_byte_offset = sizeof(Header);
// wavefront_size is specified as a power of 2: 2^6 = 64 threads.
Header.wavefront_size = 6;
@@ -463,7 +513,7 @@ std::pair<int, int> getIntegerPairAttrib
return Ints;
}
-unsigned getVmcntBitMask(const IsaVersion &Version) {
+unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
if (Version.Major < 9)
return VmcntLo;
@@ -472,15 +522,15 @@ unsigned getVmcntBitMask(const IsaVersio
return VmcntLo | VmcntHi;
}
-unsigned getExpcntBitMask(const IsaVersion &Version) {
+unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getExpcntBitWidth()) - 1;
}
-unsigned getLgkmcntBitMask(const IsaVersion &Version) {
+unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
return (1 << getLgkmcntBitWidth()) - 1;
}
-unsigned getWaitcntBitMask(const IsaVersion &Version) {
+unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
@@ -492,7 +542,7 @@ unsigned getWaitcntBitMask(const IsaVers
return Waitcnt | VmcntHi;
}
-unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
unsigned VmcntLo =
unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
if (Version.Major < 9)
@@ -504,22 +554,22 @@ unsigned decodeVmcnt(const IsaVersion &V
return VmcntLo | VmcntHi;
}
-unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
+unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
Vmcnt = decodeVmcnt(Version, Waitcnt);
Expcnt = decodeExpcnt(Version, Waitcnt);
Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
}
-unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned Vmcnt) {
Waitcnt =
packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
@@ -530,17 +580,17 @@ unsigned encodeVmcnt(const IsaVersion &V
return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
}
-unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned Expcnt) {
return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
}
-unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt) {
return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
}
-unsigned encodeWaitcnt(const IsaVersion &Version,
+unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
unsigned Waitcnt = getWaitcntBitMask(Version);
Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Wed Sep 12 00:05:30 2018
@@ -19,7 +19,6 @@
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/TargetParser.h"
#include <cstdint>
#include <string>
#include <utility>
@@ -57,6 +56,16 @@ enum {
TRAP_NUM_SGPRS = 16
};
+/// Instruction set architecture version.
+struct IsaVersion {
+ unsigned Major;
+ unsigned Minor;
+ unsigned Stepping;
+};
+
+/// \returns Isa version for given subtarget \p Features.
+IsaVersion getIsaVersion(const FeatureBitset &Features);
+
/// Streams isa version string for given subtarget \p STI into \p Stream.
void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream);
@@ -64,114 +73,114 @@ void streamIsaVersion(const MCSubtargetI
/// false otherwise.
bool hasCodeObjectV3(const MCSubtargetInfo *STI);
-/// \returns Wavefront size for given subtarget \p STI.
-unsigned getWavefrontSize(const MCSubtargetInfo *STI);
+/// \returns Wavefront size for given subtarget \p Features.
+unsigned getWavefrontSize(const FeatureBitset &Features);
-/// \returns Local memory size in bytes for given subtarget \p STI.
-unsigned getLocalMemorySize(const MCSubtargetInfo *STI);
+/// \returns Local memory size in bytes for given subtarget \p Features.
+unsigned getLocalMemorySize(const FeatureBitset &Features);
/// \returns Number of execution units per compute unit for given subtarget \p
-/// STI.
-unsigned getEUsPerCU(const MCSubtargetInfo *STI);
+/// Features.
+unsigned getEUsPerCU(const FeatureBitset &Features);
/// \returns Maximum number of work groups per compute unit for given subtarget
-/// \p STI and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
+/// \p Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
unsigned FlatWorkGroupSize);
/// \returns Maximum number of waves per compute unit for given subtarget \p
-/// STI without any kind of limitation.
-unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI);
+/// Features without any kind of limitation.
+unsigned getMaxWavesPerCU(const FeatureBitset &Features);
/// \returns Maximum number of waves per compute unit for given subtarget \p
-/// STI and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerCU(const MCSubtargetInfo *STI,
+/// Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerCU(const FeatureBitset &Features,
unsigned FlatWorkGroupSize);
/// \returns Minimum number of waves per execution unit for given subtarget \p
-/// STI.
-unsigned getMinWavesPerEU(const MCSubtargetInfo *STI);
+/// Features.
+unsigned getMinWavesPerEU(const FeatureBitset &Features);
/// \returns Maximum number of waves per execution unit for given subtarget \p
-/// STI without any kind of limitation.
+/// Features without any kind of limitation.
unsigned getMaxWavesPerEU();
/// \returns Maximum number of waves per execution unit for given subtarget \p
-/// STI and limited by given \p FlatWorkGroupSize.
-unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI,
+/// Features and limited by given \p FlatWorkGroupSize.
+unsigned getMaxWavesPerEU(const FeatureBitset &Features,
unsigned FlatWorkGroupSize);
-/// \returns Minimum flat work group size for given subtarget \p STI.
-unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI);
+/// \returns Minimum flat work group size for given subtarget \p Features.
+unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features);
-/// \returns Maximum flat work group size for given subtarget \p STI.
-unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI);
+/// \returns Maximum flat work group size for given subtarget \p Features.
+unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features);
-/// \returns Number of waves per work group for given subtarget \p STI and
+/// \returns Number of waves per work group for given subtarget \p Features and
/// limited by given \p FlatWorkGroupSize.
-unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
+unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
unsigned FlatWorkGroupSize);
-/// \returns SGPR allocation granularity for given subtarget \p STI.
-unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI);
+/// \returns SGPR allocation granularity for given subtarget \p Features.
+unsigned getSGPRAllocGranule(const FeatureBitset &Features);
-/// \returns SGPR encoding granularity for given subtarget \p STI.
-unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI);
+/// \returns SGPR encoding granularity for given subtarget \p Features.
+unsigned getSGPREncodingGranule(const FeatureBitset &Features);
-/// \returns Total number of SGPRs for given subtarget \p STI.
-unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI);
+/// \returns Total number of SGPRs for given subtarget \p Features.
+unsigned getTotalNumSGPRs(const FeatureBitset &Features);
-/// \returns Addressable number of SGPRs for given subtarget \p STI.
-unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI);
+/// \returns Addressable number of SGPRs for given subtarget \p Features.
+unsigned getAddressableNumSGPRs(const FeatureBitset &Features);
/// \returns Minimum number of SGPRs that meets the given number of waves per
-/// execution unit requirement for given subtarget \p STI.
-unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
/// \returns Maximum number of SGPRs that meets the given number of waves per
-/// execution unit requirement for given subtarget \p STI.
-unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
bool Addressable);
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
-/// STI when the given special registers are used.
-unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
+/// Features when the given special registers are used.
+unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
bool FlatScrUsed, bool XNACKUsed);
/// \returns Number of extra SGPRs implicitly required by given subtarget \p
-/// STI when the given special registers are used. XNACK is inferred from
-/// \p STI.
-unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
+/// Features when the given special registers are used. XNACK is inferred from
+/// \p Features.
+unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
bool FlatScrUsed);
-/// \returns Number of SGPR blocks needed for given subtarget \p STI when
+/// \returns Number of SGPR blocks needed for given subtarget \p Features when
/// \p NumSGPRs are used. \p NumSGPRs should already include any special
/// register counts.
-unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
+unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
-/// \returns VGPR allocation granularity for given subtarget \p STI.
-unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI);
+/// \returns VGPR allocation granularity for given subtarget \p Features.
+unsigned getVGPRAllocGranule(const FeatureBitset &Features);
-/// \returns VGPR encoding granularity for given subtarget \p STI.
-unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI);
+/// \returns VGPR encoding granularity for given subtarget \p Features.
+unsigned getVGPREncodingGranule(const FeatureBitset &Features);
-/// \returns Total number of VGPRs for given subtarget \p STI.
-unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
+/// \returns Total number of VGPRs for given subtarget \p Features.
+unsigned getTotalNumVGPRs(const FeatureBitset &Features);
-/// \returns Addressable number of VGPRs for given subtarget \p STI.
-unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
+/// \returns Addressable number of VGPRs for given subtarget \p Features.
+unsigned getAddressableNumVGPRs(const FeatureBitset &Features);
/// \returns Minimum number of VGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p STI.
-unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
/// \returns Maximum number of VGPRs that meets given number of waves per
-/// execution unit requirement for given subtarget \p STI.
-unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU);
+/// execution unit requirement for given subtarget \p Features.
+unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU);
-/// \returns Number of VGPR blocks needed for given subtarget \p STI when
+/// \returns Number of VGPR blocks needed for given subtarget \p Features when
/// \p NumVGPRs are used.
-unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs);
+unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs);
} // end namespace IsaInfo
@@ -224,7 +233,7 @@ LLVM_READONLY
int getMCOpcode(uint16_t Opcode, unsigned Gen);
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
- const MCSubtargetInfo *STI);
+ const FeatureBitset &Features);
amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
@@ -259,25 +268,25 @@ std::pair<int, int> getIntegerPairAttrib
bool OnlyFirstRequired = false);
/// \returns Vmcnt bit mask for given isa \p Version.
-unsigned getVmcntBitMask(const IsaVersion &Version);
+unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Expcnt bit mask for given isa \p Version.
-unsigned getExpcntBitMask(const IsaVersion &Version);
+unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Lgkmcnt bit mask for given isa \p Version.
-unsigned getLgkmcntBitMask(const IsaVersion &Version);
+unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Waitcnt bit mask for given isa \p Version.
-unsigned getWaitcntBitMask(const IsaVersion &Version);
+unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version);
/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
-unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt);
+unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt);
/// Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
@@ -288,19 +297,19 @@ unsigned decodeLgkmcnt(const IsaVersion
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
/// \p Expcnt = \p Waitcnt[6:4]
/// \p Lgkmcnt = \p Waitcnt[11:8]
-void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
+void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
-unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned Vmcnt);
/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
-unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned Expcnt);
/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
-unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
+unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt);
/// Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
@@ -315,7 +324,7 @@ unsigned encodeLgkmcnt(const IsaVersion
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
/// isa \p Version.
-unsigned encodeWaitcnt(const IsaVersion &Version,
+unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
unsigned getInitialPSInputAddr(const Function &F);
Removed: llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll?rev=342022&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/gfx902-without-xnack.ll (removed)
@@ -1,8 +0,0 @@
-; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s
-
-; CHECK: .hsa_code_object_isa 9,0,2,"AMD","AMDGPU"
-define amdgpu_kernel void @test_kernel(float addrspace(1)* %out0, double addrspace(1)* %out1) nounwind {
- store float 0.0, float addrspace(1)* %out0
- ret void
-}
-
Modified: llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s?rev=342023&r1=342022&r2=342023&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s Wed Sep 12 00:05:30 2018
@@ -2,5 +2,5 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
.hsa_code_object_isa
-// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
+// GFX8: .hsa_code_object_isa 8,0,0,"AMD","AMDGPU"
// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
More information about the llvm-commits
mailing list