[llvm] r335942 - AMDGPU: Separate R600 and GCN TableGen files
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 28 16:47:12 PDT 2018
Author: tstellar
Date: Thu Jun 28 16:47:12 2018
New Revision: 335942
URL: http://llvm.org/viewvc/llvm-project?rev=335942&view=rev
Log:
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
Added:
llvm/trunk/lib/Target/AMDGPU/AMDGPUFeatures.td
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
llvm/trunk/lib/Target/AMDGPU/R600.td
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td
llvm/trunk/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
llvm/trunk/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt
llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
llvm/trunk/lib/Target/AMDGPU/R600AsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/R600ClauseMergePass.cpp
llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
llvm/trunk/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
llvm/trunk/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h
llvm/trunk/lib/Target/AMDGPU/R600InstrFormats.td
llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.h
llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
llvm/trunk/lib/Target/AMDGPU/R600MachineScheduler.cpp
llvm/trunk/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
llvm/trunk/lib/Target/AMDGPU/R600Packetizer.cpp
llvm/trunk/lib/Target/AMDGPU/R600Processors.td
llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.cpp
llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.h
llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.td
llvm/trunk/lib/Target/AMDGPU/R700Instructions.td
llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Thu Jun 28 16:47:12 2018
@@ -9,23 +9,12 @@
include "llvm/TableGen/SearchableTable.td"
include "llvm/Target/Target.td"
+include "AMDGPUFeatures.td"
//===------------------------------------------------------------===//
// Subtarget Features (device properties)
//===------------------------------------------------------------===//
-def FeatureFP64 : SubtargetFeature<"fp64",
- "FP64",
- "true",
- "Enable double precision operations"
->;
-
-def FeatureFMA : SubtargetFeature<"fmaf",
- "FMA",
- "true",
- "Enable single precision FMA (not as fast as mul+add, but fused)"
->;
-
def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
"FastFMAF32",
"true",
@@ -44,30 +33,6 @@ def HalfRate64Ops : SubtargetFeature<"ha
"Most fp64 instructions are half rate instead of quarter"
>;
-def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
- "R600ALUInst",
- "false",
- "Older version of ALU instructions encoding"
->;
-
-def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
- "HasVertexCache",
- "true",
- "Specify use of dedicated vertex cache"
->;
-
-def FeatureCaymanISA : SubtargetFeature<"caymanISA",
- "CaymanISA",
- "true",
- "Use Cayman ISA"
->;
-
-def FeatureCFALUBug : SubtargetFeature<"cfalubug",
- "CFALUBug",
- "true",
- "GPU has CF_ALU bug"
->;
-
def FeatureFlatAddressSpace : SubtargetFeature<"flat-address-space",
"FlatAddressSpace",
"true",
@@ -153,27 +118,6 @@ def FeatureSGPRInitBug : SubtargetFeatur
"VI SGPR initialization bug requiring a fixed SGPR allocation size"
>;
-class SubtargetFeatureFetchLimit <string Value> :
- SubtargetFeature <"fetch"#Value,
- "TexVTXClauseSize",
- Value,
- "Limit the maximum number of fetches in a clause to "#Value
->;
-
-def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
-def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
-
-class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
- "wavefrontsize"#Value,
- "WavefrontSize",
- !cast<string>(Value),
- "The number of threads per wavefront"
->;
-
-def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
-def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
-def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
-
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
"ldsbankcount"#Value,
"LDSBankCount",
@@ -184,19 +128,6 @@ class SubtargetFeatureLDSBankCount <int
def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
-class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
- "localmemorysize"#Value,
- "LocalMemorySize",
- !cast<string>(Value),
- "The size of local memory in bytes"
->;
-
-def FeatureGCN : SubtargetFeature<"gcn",
- "IsGCN",
- "true",
- "GCN or newer GPU"
->;
-
def FeatureGCN3Encoding : SubtargetFeature<"gcn3-encoding",
"GCN3Encoding",
"true",
@@ -369,12 +300,6 @@ def FeatureFP16Denormals : SubtargetFeat
[FeatureFP64FP16Denormals]
>;
-def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
- "DX10Clamp",
- "true",
- "clamp modifier clamps NaNs to 0.0"
->;
-
def FeatureFPExceptions : SubtargetFeature<"fp-exceptions",
"FPExceptions",
"true",
@@ -417,12 +342,6 @@ def FeatureDumpCodeLower : SubtargetFeat
"Dump MachineInstrs in the CodeEmitter"
>;
-def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
- "EnablePromoteAlloca",
- "true",
- "Enable promote alloca pass"
->;
-
// XXX - This should probably be removed once enabled by default
def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt",
"EnableLoadStoreOpt",
@@ -486,45 +405,29 @@ def FeatureDisable : SubtargetFeature<""
"Dummy feature to disable assembler instructions"
>;
-class SubtargetFeatureGeneration <string Value,
- list<SubtargetFeature> Implies> :
- SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value,
- Value#" GPU generation", Implies>;
-
-def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
-def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
-def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
-
-def FeatureR600 : SubtargetFeatureGeneration<"R600",
- [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
->;
-
-def FeatureR700 : SubtargetFeatureGeneration<"R700",
- [FeatureFetchLimit16, FeatureLocalMemorySize0]
->;
-
-def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN",
- [FeatureFetchLimit16, FeatureLocalMemorySize32768]
+def FeatureGCN : SubtargetFeature<"gcn",
+ "IsGCN",
+ "true",
+ "GCN or newer GPU"
>;
-def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
- [FeatureFetchLimit16, FeatureWavefrontSize64,
- FeatureLocalMemorySize32768]
->;
+class AMDGPUSubtargetFeatureGeneration <string Value,
+ list<SubtargetFeature> Implies> :
+ SubtargetFeatureGeneration <Value, "AMDGPUSubtarget", Implies>;
-def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
+def FeatureSouthernIslands : AMDGPUSubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureGCN,
FeatureLDSBankCount32, FeatureMovrel]
>;
-def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
+def FeatureSeaIslands : AMDGPUSubtargetFeatureGeneration<"SEA_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
FeatureCIInsts, FeatureMovrel]
>;
-def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
+def FeatureVolcanicIslands : AMDGPUSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
[FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
@@ -535,7 +438,7 @@ def FeatureVolcanicIslands : SubtargetFe
]
>;
-def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
+def FeatureGFX9 : AMDGPUSubtargetFeatureGeneration<"GFX9",
[FeatureFP64, FeatureLocalMemorySize65536,
FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
@@ -738,8 +641,6 @@ def NullALU : InstrItinClass;
// Predicate helper class
//===----------------------------------------------------------------------===//
-def TruePredicate : Predicate<"true">;
-
def isSICI : Predicate<
"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS"
@@ -831,36 +732,15 @@ def HasDLInsts : Predicate<"Subtarget->h
def EnableLateCFGStructurize : Predicate<
"EnableLateStructurizeCFG">;
-// Exists to help track down where SubtargetPredicate isn't set rather
-// than letting tablegen crash with an unhelpful error.
-def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
-
-class PredicateControl {
- Predicate SubtargetPredicate = InvalidPred;
- Predicate SIAssemblerPredicate = isSICI;
- Predicate VIAssemblerPredicate = isVI;
- list<Predicate> AssemblerPredicates = [];
- Predicate AssemblerPredicate = TruePredicate;
- list<Predicate> OtherPredicates = [];
- list<Predicate> Predicates = !listconcat([SubtargetPredicate,
- AssemblerPredicate],
- AssemblerPredicates,
- OtherPredicates);
-}
-
-class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
- PredicateControl;
-
-
// Include AMDGPU TD files
-include "R600Schedule.td"
-include "R600Processors.td"
include "SISchedule.td"
include "GCNProcessors.td"
include "AMDGPUInstrInfo.td"
include "AMDGPUIntrinsics.td"
+include "SIIntrinsics.td"
include "AMDGPURegisterInfo.td"
include "AMDGPURegisterBanks.td"
include "AMDGPUInstructions.td"
+include "SIInstrInfo.td"
include "AMDGPUCallingConv.td"
include "AMDGPUSearchableTables.td"
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallingConv.td Thu Jun 28 16:47:12 2018
@@ -85,17 +85,6 @@ def RetCC_SI_Shader : CallingConv<[
]>>
]>;
-// Calling convention for R600
-def CC_R600 : CallingConv<[
- CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
- T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
- T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
- T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
- T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
- T30_XYZW, T31_XYZW, T32_XYZW
- ]>>>
-]>;
-
// Calling convention for compute kernels
def CC_AMDGPU_Kernel : CallingConv<[
CCCustom<"allocateKernArg">
@@ -165,9 +154,5 @@ def CC_AMDGPU : CallingConv<[
CCIf<"static_cast<const AMDGPUSubtarget&>"
"(State.getMachineFunction().getSubtarget()).getGeneration() >= "
"AMDGPUSubtarget::SOUTHERN_ISLANDS && State.getCallingConv() == CallingConv::C",
- CCDelegateTo<CC_AMDGPU_Func>>,
- CCIf<"static_cast<const AMDGPUSubtarget&>"
- "(State.getMachineFunction().getSubtarget()).getGeneration() < "
- "AMDGPUSubtarget::SOUTHERN_ISLANDS",
- CCDelegateTo<CC_R600>>
+ CCDelegateTo<CC_AMDGPU_Func>>
]>;
Added: llvm/trunk/lib/Target/AMDGPU/AMDGPUFeatures.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUFeatures.td?rev=335942&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUFeatures.td (added)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUFeatures.td Thu Jun 28 16:47:12 2018
@@ -0,0 +1,60 @@
+//===-- AMDGPUFeatures.td - AMDGPU Feature Definitions -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def FeatureFP64 : SubtargetFeature<"fp64",
+ "FP64",
+ "true",
+ "Enable double precision operations"
+>;
+
+def FeatureFMA : SubtargetFeature<"fmaf",
+ "FMA",
+ "true",
+ "Enable single precision FMA (not as fast as mul+add, but fused)"
+>;
+
+class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
+ "localmemorysize"#Value,
+ "LocalMemorySize",
+ !cast<string>(Value),
+ "The size of local memory in bytes"
+>;
+
+def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>;
+def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>;
+def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>;
+
+class SubtargetFeatureWavefrontSize <int Value> : SubtargetFeature<
+ "wavefrontsize"#Value,
+ "WavefrontSize",
+ !cast<string>(Value),
+ "The number of threads per wavefront"
+>;
+
+def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>;
+def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>;
+def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>;
+
+class SubtargetFeatureGeneration <string Value, string Subtarget,
+ list<SubtargetFeature> Implies> :
+ SubtargetFeature <Value, "Gen", Subtarget#"::"#Value,
+ Value#" GPU generation", Implies>;
+
+def FeatureDX10Clamp : SubtargetFeature<"dx10-clamp",
+ "DX10Clamp",
+ "true",
+ "clamp modifier clamps NaNs to 0.0"
+>;
+
+def FeaturePromoteAlloca : SubtargetFeature <"promote-alloca",
+ "EnablePromoteAlloca",
+ "true",
+ "Enable promote alloca pass"
+>;
+
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Thu Jun 28 16:47:12 2018
@@ -104,15 +104,11 @@ private:
bool isNoNanSrc(SDValue N) const;
bool isInlineImmediate(const SDNode *N) const;
- bool isConstantLoad(const MemSDNode *N, int cbID) const;
bool isUniformBr(const SDNode *N) const;
SDNode *glueCopyToM0(SDNode *N) const;
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
- bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
- bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
- SDValue& Offset);
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
@@ -227,9 +223,18 @@ protected:
};
class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
+ const R600Subtarget *Subtarget;
+ AMDGPUAS AMDGPUASI;
+
+ bool isConstantLoad(const MemSDNode *N, int cbID) const;
+ bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
+ bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
+ SDValue& Offset);
public:
explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
- AMDGPUDAGToDAGISel(TM, OptLevel) {}
+ AMDGPUDAGToDAGISel(TM, OptLevel) {
+ AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
+ }
void Select(SDNode *N) override;
@@ -237,6 +242,11 @@ public:
SDValue &Offset) override;
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
SDValue &Offset) override;
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+protected:
+ // Include the pieces autogenerated from the target description.
+#include "R600GenDAGISel.inc"
};
} // end anonymous namespace
@@ -280,8 +290,7 @@ bool AMDGPUDAGToDAGISel::isNoNanSrc(SDVa
}
bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
- const SIInstrInfo *TII
- = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
+ const SIInstrInfo *TII = Subtarget->getInstrInfo();
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
return TII->isInlineConstant(C->getAPIntValue());
@@ -637,16 +646,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *
SelectCode(N);
}
-bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
- if (!N->readMem())
- return false;
- if (CbId == -1)
- return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
- N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
-
- return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
-}
-
bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
const Instruction *Term = BB->getTerminator();
@@ -662,26 +661,6 @@ StringRef AMDGPUDAGToDAGISel::getPassNam
// Complex Patterns
//===----------------------------------------------------------------------===//
-bool AMDGPUDAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
- SDValue& IntPtr) {
- if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
- IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
- true);
- return true;
- }
- return false;
-}
-
-bool AMDGPUDAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
- SDValue& BaseReg, SDValue &Offset) {
- if (!isa<ConstantSDNode>(Addr)) {
- BaseReg = Addr;
- Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
- return true;
- }
- return false;
-}
-
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
SDValue &Offset) {
return false;
@@ -693,11 +672,11 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndir
SDLoc DL(Addr);
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
- Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+ Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
- Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+ Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
@@ -2160,6 +2139,41 @@ void AMDGPUDAGToDAGISel::PostprocessISel
} while (IsModified);
}
+bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ Subtarget = &MF.getSubtarget<R600Subtarget>();
+ return SelectionDAGISel::runOnMachineFunction(MF);
+}
+
+bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
+ if (!N->readMem())
+ return false;
+ if (CbId == -1)
+ return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
+ N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
+
+ return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
+}
+
+bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
+ SDValue& IntPtr) {
+ if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
+ IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
+ true);
+ return true;
+ }
+ return false;
+}
+
+bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
+ SDValue& BaseReg, SDValue &Offset) {
+ if (!isa<ConstantSDNode>(Addr)) {
+ BaseReg = Addr;
+ Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
+ return true;
+ }
+ return false;
+}
+
void R600DAGToDAGISel::Select(SDNode *N) {
unsigned int Opc = N->getOpcode();
if (N->isMachineOpcode()) {
@@ -2180,12 +2194,12 @@ void R600DAGToDAGISel::Select(SDNode *N)
// pass. We want to avoid 128 bits copies as much as possible because they
// can't be bundled by our scheduler.
switch(NumVectorElts) {
- case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break;
+ case 2: RegClassID = R600::R600_Reg64RegClassID; break;
case 4:
if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
- RegClassID = AMDGPU::R600_Reg128VerticalRegClassID;
+ RegClassID = R600::R600_Reg128VerticalRegClassID;
else
- RegClassID = AMDGPU::R600_Reg128RegClassID;
+ RegClassID = R600::R600_Reg128RegClassID;
break;
default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
}
@@ -2203,11 +2217,11 @@ bool R600DAGToDAGISel::SelectADDRIndirec
SDLoc DL(Addr);
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
- Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+ Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
- Base = CurDAG->getRegister(AMDGPU::INDIRECT_BASE_ADDR, MVT::i32);
+ Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
@@ -2238,7 +2252,7 @@ bool R600DAGToDAGISel::SelectADDRVTX_REA
&& isInt<16>(IMMOffset->getZExtValue())) {
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
SDLoc(CurDAG->getEntryNode()),
- AMDGPU::ZERO, MVT::i32);
+ R600::ZERO, MVT::i32);
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
MVT::i32);
return true;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Thu Jun 28 16:47:12 2018
@@ -155,7 +155,7 @@ unsigned AMDGPUTargetLowering::numBitsSi
}
AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
- const AMDGPUSubtarget &STI)
+ const AMDGPUCommonSubtarget &STI)
: TargetLowering(TM), Subtarget(&STI) {
AMDGPUASI = AMDGPU::getAMDGPUAS(TM);
// Lower floating point store/load to integer store/load to reduce the number
@@ -330,10 +330,6 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::FLOG, MVT::f32, Custom);
setOperationAction(ISD::FLOG10, MVT::f32, Custom);
- if (Subtarget->has16BitInsts()) {
- setOperationAction(ISD::FLOG, MVT::f16, Custom);
- setOperationAction(ISD::FLOG10, MVT::f16, Custom);
- }
setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom);
setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom);
@@ -341,10 +337,6 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::FREM, MVT::f32, Custom);
setOperationAction(ISD::FREM, MVT::f64, Custom);
- // v_mad_f32 does not support denormals according to some sources.
- if (!Subtarget->hasFP32Denormals())
- setOperationAction(ISD::FMAD, MVT::f32, Legal);
-
// Expand to fneg + fadd.
setOperationAction(ISD::FSUB, MVT::f64, Expand);
@@ -359,19 +351,6 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8f32, Custom);
setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v8i32, Custom);
- if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) {
- setOperationAction(ISD::FCEIL, MVT::f64, Custom);
- setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
- setOperationAction(ISD::FRINT, MVT::f64, Custom);
- setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
- }
-
- if (!Subtarget->hasBFI()) {
- // fcopysign can be done in a single instruction with BFI.
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- }
-
setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Custom);
@@ -403,12 +382,6 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::SUBE, VT, Legal);
}
- if (!Subtarget->hasBCNT(32))
- setOperationAction(ISD::CTPOP, MVT::i32, Expand);
-
- if (!Subtarget->hasBCNT(64))
- setOperationAction(ISD::CTPOP, MVT::i64, Expand);
-
// The hardware supports 32-bit ROTR, but not ROTL.
setOperationAction(ISD::ROTL, MVT::i32, Expand);
setOperationAction(ISD::ROTL, MVT::i64, Expand);
@@ -428,28 +401,11 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::SMAX, MVT::i32, Legal);
setOperationAction(ISD::UMAX, MVT::i32, Legal);
- if (Subtarget->hasFFBH())
- setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
-
- if (Subtarget->hasFFBL())
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
-
setOperationAction(ISD::CTTZ, MVT::i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Custom);
setOperationAction(ISD::CTLZ, MVT::i64, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
- // We only really have 32-bit BFE instructions (and 16-bit on VI).
- //
- // On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any
- // effort to match them now. We want this to be false for i64 cases when the
- // extraction isn't restricted to the upper or lower half. Ideally we would
- // have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that
- // span the midpoint are probably relatively rare, so don't worry about them
- // for now.
- if (Subtarget->hasBFE())
- setHasExtractBitsInsn(true);
-
static const MVT::SimpleValueType VectorIntTypes[] = {
MVT::v2i32, MVT::v4i32
};
@@ -554,11 +510,6 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
// vector compares until that is fixed.
setHasMultipleConditionRegisters(true);
- // SI at least has hardware support for floating point exceptions, but no way
- // of using or handling them is implemented. They are also optional in OpenCL
- // (Section 7.3)
- setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
-
PredictableSelectIsExpensive = false;
// We want to find all load dependencies for long chains of stores to enable
@@ -781,7 +732,7 @@ bool AMDGPUTargetLowering::isSDNodeAlway
{
const LoadSDNode * L = dyn_cast<LoadSDNode>(N);
if (L->getMemOperand()->getAddrSpace()
- == Subtarget->getAMDGPUAS().CONSTANT_ADDRESS_32BIT)
+ == AMDGPUASI.CONSTANT_ADDRESS_32BIT)
return true;
return false;
}
@@ -4290,9 +4241,11 @@ void AMDGPUTargetLowering::computeKnownB
switch (IID) {
case Intrinsic::amdgcn_mbcnt_lo:
case Intrinsic::amdgcn_mbcnt_hi: {
+ const SISubtarget &ST =
+ DAG.getMachineFunction().getSubtarget<SISubtarget>();
// These return at most the wavefront size - 1.
unsigned Size = Op.getValueType().getSizeInBits();
- Known.Zero.setHighBits(Size - Subtarget->getWavefrontSizeLog2());
+ Known.Zero.setHighBits(Size - ST.getWavefrontSizeLog2());
break;
}
default:
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Thu Jun 28 16:47:12 2018
@@ -23,11 +23,13 @@
namespace llvm {
class AMDGPUMachineFunction;
-class AMDGPUSubtarget;
+class AMDGPUCommonSubtarget;
struct ArgDescriptor;
class AMDGPUTargetLowering : public TargetLowering {
private:
+ const AMDGPUCommonSubtarget *Subtarget;
+
/// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been
/// legalized from a smaller type VT. Need to match pre-legalized type because
/// the generic legalization inserts the add/sub between the select and
@@ -39,7 +41,6 @@ public:
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
protected:
- const AMDGPUSubtarget *Subtarget;
AMDGPUAS AMDGPUASI;
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -124,7 +125,7 @@ protected:
void analyzeFormalArgumentsCompute(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
public:
- AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
+ AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUCommonSubtarget &STI);
bool mayIgnoreSignedZero(SDValue Op) const {
if (getTargetMachine().Options.NoSignedZerosFPMath)
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp Thu Jun 28 16:47:12 2018
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
//
/// \file
-/// Implementation of the TargetInstrInfo class that is common to all
+/// \brief Implementation of the TargetInstrInfo class that is common to all
/// AMD GPUs.
//
//===----------------------------------------------------------------------===//
@@ -23,107 +23,11 @@
using namespace llvm;
-#define GET_INSTRINFO_CTOR_DTOR
-#include "AMDGPUGenInstrInfo.inc"
-
-namespace llvm {
-namespace AMDGPU {
-#define GET_D16ImageDimIntrinsics_IMPL
-#define GET_ImageDimIntrinsicTable_IMPL
-#define GET_RsrcIntrinsics_IMPL
-#include "AMDGPUGenSearchableTables.inc"
-}
-}
-
// Pin the vtable to this file.
-void AMDGPUInstrInfo::anchor() {}
+//void AMDGPUInstrInfo::anchor() {}
-AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
- : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
- ST(ST),
- AMDGPUASI(ST.getAMDGPUAS()) {}
-
-// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
-// the first 16 loads will be interleaved with the stores, and the next 16 will
-// be clustered as expected. It should really split into 2 16 store batches.
-//
-// Loads are clustered until this returns false, rather than trying to schedule
-// groups of stores. This also means we have to deal with saying different
-// address space loads should be clustered, and ones which might cause bank
-// conflicts.
-//
-// This might be deprecated so it might not be worth that much effort to fix.
-bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
- int64_t Offset0, int64_t Offset1,
- unsigned NumLoads) const {
- assert(Offset1 > Offset0 &&
- "Second offset should be larger than first offset!");
- // If we have less than 16 loads in a row, and the offsets are within 64
- // bytes, then schedule together.
-
- // A cacheline is 64 bytes (for global memory).
- return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
-}
-
-// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
-enum SIEncodingFamily {
- SI = 0,
- VI = 1,
- SDWA = 2,
- SDWA9 = 3,
- GFX80 = 4,
- GFX9 = 5
-};
-
-static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
- switch (ST.getGeneration()) {
- case AMDGPUSubtarget::SOUTHERN_ISLANDS:
- case AMDGPUSubtarget::SEA_ISLANDS:
- return SIEncodingFamily::SI;
- case AMDGPUSubtarget::VOLCANIC_ISLANDS:
- case AMDGPUSubtarget::GFX9:
- return SIEncodingFamily::VI;
-
- // FIXME: This should never be called for r600 GPUs.
- case AMDGPUSubtarget::R600:
- case AMDGPUSubtarget::R700:
- case AMDGPUSubtarget::EVERGREEN:
- case AMDGPUSubtarget::NORTHERN_ISLANDS:
- return SIEncodingFamily::SI;
- }
-
- llvm_unreachable("Unknown subtarget generation!");
-}
-
-int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
- SIEncodingFamily Gen = subtargetEncodingFamily(ST);
-
- if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
- ST.getGeneration() >= AMDGPUSubtarget::GFX9)
- Gen = SIEncodingFamily::GFX9;
-
- if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
- Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
- : SIEncodingFamily::SDWA;
- // Adjust the encoding family to GFX80 for D16 buffer instructions when the
- // subtarget has UnpackedD16VMem feature.
- // TODO: remove this when we discard GFX80 encoding.
- if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
- Gen = SIEncodingFamily::GFX80;
-
- int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
-
- // -1 means that Opcode is already a native instruction.
- if (MCOp == -1)
- return Opcode;
-
- // (uint16_t)-1 means that Opcode is a pseudo instruction that has
- // no encoding in the given subtarget generation.
- if (MCOp == (uint16_t)-1)
- return -1;
+AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) { }
- return MCOp;
-}
// TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.h Thu Jun 28 16:47:12 2018
@@ -20,10 +20,6 @@
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
-#define GET_INSTRINFO_HEADER
-#include "AMDGPUGenInstrInfo.inc"
-#undef GET_INSTRINFO_HEADER
-
namespace llvm {
class AMDGPUSubtarget;
@@ -31,26 +27,10 @@ class MachineFunction;
class MachineInstr;
class MachineInstrBuilder;
-class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
-private:
- const AMDGPUSubtarget &ST;
-
- virtual void anchor();
-protected:
- AMDGPUAS AMDGPUASI;
-
+class AMDGPUInstrInfo {
public:
explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
- bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
- int64_t Offset1, int64_t Offset2,
- unsigned NumLoads) const override;
-
- /// Return a target-specific opcode if Opcode is a pseudo instruction.
- /// Return -1 if the target-specific opcode for the pseudo instruction does
- /// not exist. If Opcode is not a pseudo instruction, this is identity.
- int pseudoToMCOpcode(int Opcode) const;
-
static bool isUniformMMO(const MachineMemOperand *MMO);
};
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructions.td Thu Jun 28 16:47:12 2018
@@ -42,6 +42,47 @@ class AMDGPUShaderInst <dag outs, dag in
field bits<32> Inst = 0xffffffff;
}
+//===---------------------------------------------------------------------===//
+// Return instruction
+//===---------------------------------------------------------------------===//
+
+class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
+: Instruction {
+
+ let Namespace = "AMDGPU";
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+ let Pattern = pattern;
+ let AsmString = !strconcat(asmstr, "\n");
+ let isPseudo = 1;
+ let Itinerary = NullALU;
+ bit hasIEEEFlag = 0;
+ bit hasZeroOpFlag = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let isCodeGenOnly = 1;
+}
+
+def TruePredicate : Predicate<"true">;
+
+// Exists to help track down where SubtargetPredicate isn't set rather
+// than letting tablegen crash with an unhelpful error.
+def InvalidPred : Predicate<"predicate not set on instruction or pattern">;
+
+class PredicateControl {
+ Predicate SubtargetPredicate = InvalidPred;
+ list<Predicate> AssemblerPredicates = [];
+ Predicate AssemblerPredicate = TruePredicate;
+ list<Predicate> OtherPredicates = [];
+ list<Predicate> Predicates = !listconcat([SubtargetPredicate,
+ AssemblerPredicate],
+ AssemblerPredicates,
+ OtherPredicates);
+}
+class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>,
+ PredicateControl;
+
def FP16Denormals : Predicate<"Subtarget->hasFP16Denormals()">;
def FP32Denormals : Predicate<"Subtarget->hasFP32Denormals()">;
def FP64Denormals : Predicate<"Subtarget->hasFP64Denormals()">;
@@ -94,12 +135,6 @@ def brtarget : Operand<OtherVT>;
// Misc. PatFrags
//===----------------------------------------------------------------------===//
-class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag<
- (ops node:$src0),
- (op $src0),
- [{ return N->hasOneUse(); }]
->;
-
class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
(ops node:$src0, node:$src1),
(op $src0, $src1),
@@ -112,8 +147,6 @@ class HasOneUseTernaryOp<SDPatternOperat
[{ return N->hasOneUse(); }]
>;
-def trunc_oneuse : HasOneUseUnaryOp<trunc>;
-
let Properties = [SDNPCommutative, SDNPAssociative] in {
def smax_oneuse : HasOneUseBinOp<smax>;
def smin_oneuse : HasOneUseBinOp<smin>;
@@ -240,6 +273,37 @@ def COND_NULL : PatLeaf <
[{(void)N; return false;}]
>;
+//===----------------------------------------------------------------------===//
+// PatLeafs for Texture Constants
+//===----------------------------------------------------------------------===//
+
+def TEX_ARRAY : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 9 || TType == 10 || TType == 16;
+ }]
+>;
+
+def TEX_RECT : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 5;
+ }]
+>;
+
+def TEX_SHADOW : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return (TType >= 6 && TType <= 8) || TType == 13;
+ }]
+>;
+
+def TEX_SHADOW_ARRAY : PatLeaf<
+ (imm),
+ [{uint32_t TType = (uint32_t)N->getZExtValue();
+ return TType == 11 || TType == 12 || TType == 17;
+ }]
+>;
//===----------------------------------------------------------------------===//
// Load/Store Pattern Fragments
@@ -769,11 +833,3 @@ class RsqPat<Instruction RsqInst, ValueT
(AMDGPUrcp (fsqrt vt:$src)),
(RsqInst $src)
>;
-
-include "R600Instructions.td"
-include "R700Instructions.td"
-include "EvergreenInstructions.td"
-include "CaymanInstructions.td"
-
-include "SIInstrInfo.td"
-
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUIntrinsics.td Thu Jun 28 16:47:12 2018
@@ -14,5 +14,3 @@
let TargetPrefix = "AMDGPU", isTarget = 1 in {
def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
}
-
-include "SIIntrinsics.td"
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULowerIntrinsics.cpp Thu Jun 28 16:47:12 2018
@@ -117,7 +117,6 @@ bool AMDGPULowerIntrinsics::makeLIDRange
return false;
const TargetMachine &TM = TPC->getTM<TargetMachine>();
- const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(F);
bool Changed = false;
for (auto *U : F.users()) {
@@ -125,7 +124,7 @@ bool AMDGPULowerIntrinsics::makeLIDRange
if (!CI)
continue;
- Changed |= ST.makeLIDRangeMetadata(CI);
+ Changed |= AMDGPUCommonSubtarget::get(TM, F).makeLIDRangeMetadata(CI);
}
return Changed;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp Thu Jun 28 16:47:12 2018
@@ -152,7 +152,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(
IsAMDGCN = TT.getArch() == Triple::amdgcn;
IsAMDHSA = TT.getOS() == Triple::AMDHSA;
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
+ const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
if (!ST.isPromoteAllocaEnabled())
return false;
@@ -174,8 +174,8 @@ bool AMDGPUPromoteAlloca::runOnFunction(
std::pair<Value *, Value *>
AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
- *Builder.GetInsertBlock()->getParent());
+ const Function &F = *Builder.GetInsertBlock()->getParent();
+ const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
if (!IsAMDHSA) {
Function *LocalSizeYFn
@@ -261,8 +261,8 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBu
}
Value *AMDGPUPromoteAlloca::getWorkitemID(IRBuilder<> &Builder, unsigned N) {
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(
- *Builder.GetInsertBlock()->getParent());
+ const AMDGPUCommonSubtarget &ST =
+ AMDGPUCommonSubtarget::get(*TM, *Builder.GetInsertBlock()->getParent());
Intrinsic::ID IntrID = Intrinsic::ID::not_intrinsic;
switch (N) {
@@ -602,7 +602,7 @@ bool AMDGPUPromoteAlloca::collectUsesWit
bool AMDGPUPromoteAlloca::hasSufficientLocalMem(const Function &F) {
FunctionType *FTy = F.getFunctionType();
- const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(F);
+ const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, F);
// If the function has any arguments in the local address space, then it's
// possible these arguments require the entire local memory space, so
@@ -729,8 +729,7 @@ bool AMDGPUPromoteAlloca::handleAlloca(A
if (!SufficientLDS)
return false;
- const AMDGPUSubtarget &ST =
- TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
+ const AMDGPUCommonSubtarget &ST = AMDGPUCommonSubtarget::get(*TM, ContainingFunction);
unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
const DataLayout &DL = Mod->getDataLayout();
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPURegisterInfo.td Thu Jun 28 16:47:12 2018
@@ -19,5 +19,4 @@ foreach Index = 0-15 in {
}
-include "R600RegisterInfo.td"
include "SIRegisterInfo.td"
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Thu Jun 28 16:47:12 2018
@@ -23,6 +23,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include <algorithm>
@@ -34,9 +35,32 @@ using namespace llvm;
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "AMDGPUGenSubtargetInfo.inc"
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "R600GenSubtargetInfo.inc"
AMDGPUSubtarget::~AMDGPUSubtarget() = default;
+R600Subtarget &
+R600Subtarget::initializeSubtargetDependencies(const Triple &TT,
+ StringRef GPU, StringRef FS) {
+ SmallString<256> FullFS("+promote-alloca,+dx10-clamp,");
+ FullFS += FS;
+ ParseSubtargetFeatures(GPU, FullFS);
+
+ // FIXME: I don't think think Evergreen has any useful support for
+ // denormals, but should be checked. Should we issue a warning somewhere
+ // if someone tries to enable these?
+ if (getGeneration() <= R600Subtarget::NORTHERN_ISLANDS) {
+ FP32Denormals = false;
+ }
+
+ HasMulU24 = getGeneration() >= EVERGREEN;
+ HasMulI24 = hasCaymanISA();
+
+ return *this;
+}
+
AMDGPUSubtarget &
AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS) {
@@ -93,26 +117,44 @@ AMDGPUSubtarget::initializeSubtargetDepe
HasMovrel = true;
}
+ HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
+
return *this;
}
+AMDGPUCommonSubtarget::AMDGPUCommonSubtarget(const Triple &TT,
+ const FeatureBitset &FeatureBits) :
+ TargetTriple(TT),
+ SubtargetFeatureBits(FeatureBits),
+ Has16BitInsts(false),
+ HasMadMixInsts(false),
+ FP32Denormals(false),
+ FPExceptions(false),
+ HasSDWA(false),
+ HasVOP3PInsts(false),
+ HasMulI24(true),
+ HasMulU24(true),
+ HasFminFmaxLegacy(true),
+ EnablePromoteAlloca(false),
+ LocalMemorySize(0),
+ WavefrontSize(0)
+ { }
+
AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
- const TargetMachine &TM)
- : AMDGPUGenSubtargetInfo(TT, GPU, FS),
+ const TargetMachine &TM) :
+ AMDGPUGenSubtargetInfo(TT, GPU, FS),
+ AMDGPUCommonSubtarget(TT, getFeatureBits()),
+ FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
TargetTriple(TT),
- Gen(TT.getArch() == Triple::amdgcn ? SOUTHERN_ISLANDS : R600),
+ Gen(SOUTHERN_ISLANDS),
IsaVersion(ISAVersion0_0_0),
- WavefrontSize(0),
- LocalMemorySize(0),
LDSBankCount(0),
MaxPrivateElementSize(0),
FastFMAF32(false),
HalfRate64Ops(false),
- FP32Denormals(false),
FP64FP16Denormals(false),
- FPExceptions(false),
DX10Clamp(false),
FlatForGlobal(false),
AutoWaitcntBeforeBarrier(false),
@@ -128,7 +170,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
EnableHugePrivateBuffer(false),
EnableVGPRSpilling(false),
- EnablePromoteAlloca(false),
EnableLoadStoreOpt(false),
EnableUnsafeDSOffsetFolding(false),
EnableSIScheduler(false),
@@ -136,25 +177,18 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
DumpCode(false),
FP64(false),
- FMA(false),
- MIMG_R128(false),
- IsGCN(false),
GCN3Encoding(false),
CIInsts(false),
GFX9Insts(false),
SGPRInitBug(false),
HasSMemRealTime(false),
- Has16BitInsts(false),
HasIntClamp(false),
- HasVOP3PInsts(false),
- HasMadMixInsts(false),
HasFmaMixInsts(false),
HasMovrel(false),
HasVGPRIndexMode(false),
HasScalarStores(false),
HasScalarAtomics(false),
HasInv2PiInlineImm(false),
- HasSDWA(false),
HasSDWAOmod(false),
HasSDWAScalar(false),
HasSDWASdst(false),
@@ -170,20 +204,14 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
AddNoCarryInsts(false),
HasUnpackedD16VMem(false),
- R600ALUInst(false),
- CaymanISA(false),
- CFALUBug(false),
- HasVertexCache(false),
- TexVTXClauseSize(0),
ScalarizeGlobal(false),
- FeatureDisable(false),
- InstrItins(getInstrItineraryForCPU(GPU)) {
+ FeatureDisable(false) {
AS = AMDGPU::getAMDGPUAS(TT);
initializeSubtargetDependencies(TT, GPU, FS);
}
-unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
+unsigned AMDGPUCommonSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
const Function &F) const {
if (NWaves == 1)
return getLocalMemorySize();
@@ -193,7 +221,7 @@ unsigned AMDGPUSubtarget::getMaxLocalMem
return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
}
-unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
+unsigned AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes,
const Function &F) const {
unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
@@ -206,13 +234,13 @@ unsigned AMDGPUSubtarget::getOccupancyWi
}
unsigned
-AMDGPUSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
+AMDGPUCommonSubtarget::getOccupancyWithLocalMemSize(const MachineFunction &MF) const {
const auto *MFI = MF.getInfo<SIMachineFunctionInfo>();
return getOccupancyWithLocalMemSize(MFI->getLDSSize(), MF.getFunction());
}
std::pair<unsigned, unsigned>
-AMDGPUSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
+AMDGPUCommonSubtarget::getDefaultFlatWorkGroupSize(CallingConv::ID CC) const {
switch (CC) {
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_KERNEL:
@@ -230,7 +258,7 @@ AMDGPUSubtarget::getDefaultFlatWorkGroup
}
}
-std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
+std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getFlatWorkGroupSizes(
const Function &F) const {
// FIXME: 1024 if function.
// Default minimum/maximum flat work group sizes.
@@ -260,7 +288,7 @@ std::pair<unsigned, unsigned> AMDGPUSubt
return Requested;
}
-std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
+std::pair<unsigned, unsigned> AMDGPUCommonSubtarget::getWavesPerEU(
const Function &F) const {
// Default minimum/maximum number of waves per execution unit.
std::pair<unsigned, unsigned> Default(1, getMaxWavesPerEU());
@@ -308,7 +336,7 @@ std::pair<unsigned, unsigned> AMDGPUSubt
return Requested;
}
-bool AMDGPUSubtarget::makeLIDRangeMetadata(Instruction *I) const {
+bool AMDGPUCommonSubtarget::makeLIDRangeMetadata(Instruction *I) const {
Function *Kernel = I->getParent()->getParent();
unsigned MinSize = 0;
unsigned MaxSize = getFlatWorkGroupSizes(*Kernel).second;
@@ -372,10 +400,22 @@ bool AMDGPUSubtarget::makeLIDRangeMetada
R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
const TargetMachine &TM) :
- AMDGPUSubtarget(TT, GPU, FS, TM),
+ R600GenSubtargetInfo(TT, GPU, FS),
+ AMDGPUCommonSubtarget(TT, getFeatureBits()),
InstrInfo(*this),
FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
- TLInfo(TM, *this) {}
+ FMA(false),
+ CaymanISA(false),
+ CFALUBug(false),
+ DX10Clamp(false),
+ HasVertexCache(false),
+ R600ALUInst(false),
+ FP64(false),
+ TexVTXClauseSize(0),
+ Gen(R600),
+ TLInfo(TM, initializeSubtargetDependencies(TT, GPU, FS)),
+ InstrItins(getInstrItineraryForCPU(GPU)),
+ AS (AMDGPU::getAMDGPUAS(TT)) { }
SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
const GCNTargetMachine &TM)
@@ -619,3 +659,17 @@ void SISubtarget::getPostRAMutations(
std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
Mutations.push_back(llvm::make_unique<MemOpClusterMutation>(&InstrInfo));
}
+
+const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const MachineFunction &MF) {
+ if (MF.getTarget().getTargetTriple().getArch() == Triple::amdgcn)
+ return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<AMDGPUSubtarget>());
+ else
+ return static_cast<const AMDGPUCommonSubtarget&>(MF.getSubtarget<R600Subtarget>());
+}
+
+const AMDGPUCommonSubtarget &AMDGPUCommonSubtarget::get(const TargetMachine &TM, const Function &F) {
+ if (TM.getTargetTriple().getArch() == Triple::amdgcn)
+ return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<AMDGPUSubtarget>(F));
+ else
+ return static_cast<const AMDGPUCommonSubtarget&>(TM.getSubtarget<R600Subtarget>(F));
+}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Thu Jun 28 16:47:12 2018
@@ -39,22 +39,181 @@
#define GET_SUBTARGETINFO_HEADER
#include "AMDGPUGenSubtargetInfo.inc"
+#define GET_SUBTARGETINFO_HEADER
+#include "R600GenSubtargetInfo.inc"
namespace llvm {
class StringRef;
-class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
+class AMDGPUCommonSubtarget {
+private:
+ Triple TargetTriple;
+
+protected:
+ const FeatureBitset &SubtargetFeatureBits;
+ bool Has16BitInsts;
+ bool HasMadMixInsts;
+ bool FP32Denormals;
+ bool FPExceptions;
+ bool HasSDWA;
+ bool HasVOP3PInsts;
+ bool HasMulI24;
+ bool HasMulU24;
+ bool HasFminFmaxLegacy;
+ bool EnablePromoteAlloca;
+ int LocalMemorySize;
+ unsigned WavefrontSize;
+
+public:
+ AMDGPUCommonSubtarget(const Triple &TT, const FeatureBitset &FeatureBits);
+
+ static const AMDGPUCommonSubtarget &get(const MachineFunction &MF);
+ static const AMDGPUCommonSubtarget &get(const TargetMachine &TM,
+ const Function &F);
+
+ /// \returns Default range flat work group size for a calling convention.
+ std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
+
+ /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
+ /// for function \p F, or minimum/maximum flat work group sizes explicitly
+ /// requested using "amdgpu-flat-work-group-size" attribute attached to
+ /// function \p F.
+ ///
+ /// \returns Subtarget's default values if explicitly requested values cannot
+ /// be converted to integer, or violate subtarget's specifications.
+ std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
+
+ /// \returns Subtarget's default pair of minimum/maximum number of waves per
+ /// execution unit for function \p F, or minimum/maximum number of waves per
+ /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
+ /// attached to function \p F.
+ ///
+ /// \returns Subtarget's default values if explicitly requested values cannot
+ /// be converted to integer, violate subtarget's specifications, or are not
+ /// compatible with minimum/maximum number of waves limited by flat work group
+ /// size, register usage, and/or lds usage.
+ std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
+
+ /// Return the amount of LDS that can be used that will not restrict the
+ /// occupancy lower than WaveCount.
+ unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
+ const Function &) const;
+
+ /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
+ /// the given LDS memory size is the only constraint.
+ unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
+
+ unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
+
+ bool isAmdHsaOS() const {
+ return TargetTriple.getOS() == Triple::AMDHSA;
+ }
+
+ bool isAmdPalOS() const {
+ return TargetTriple.getOS() == Triple::AMDPAL;
+ }
+
+ bool has16BitInsts() const {
+ return Has16BitInsts;
+ }
+
+ bool hasMadMixInsts() const {
+ return HasMadMixInsts;
+ }
+
+ bool hasFP32Denormals() const {
+ return FP32Denormals;
+ }
+
+ bool hasFPExceptions() const {
+ return FPExceptions;
+ }
+
+ bool hasSDWA() const {
+ return HasSDWA;
+ }
+
+ bool hasVOP3PInsts() const {
+ return HasVOP3PInsts;
+ }
+
+ bool hasMulI24() const {
+ return HasMulI24;
+ }
+
+ bool hasMulU24() const {
+ return HasMulU24;
+ }
+
+ bool hasFminFmaxLegacy() const {
+ return HasFminFmaxLegacy;
+ }
+
+ bool isPromoteAllocaEnabled() const {
+ return EnablePromoteAlloca;
+ }
+
+ unsigned getWavefrontSize() const {
+ return WavefrontSize;
+ }
+
+ int getLocalMemorySize() const {
+ return LocalMemorySize;
+ }
+
+ unsigned getAlignmentForImplicitArgPtr() const {
+ return isAmdHsaOS() ? 8 : 4;
+ }
+
+ /// \returns Maximum number of work groups per compute unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
+ return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(SubtargetFeatureBits,
+ FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum flat work group size supported by the subtarget.
+ unsigned getMinFlatWorkGroupSize() const {
+ return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(SubtargetFeatureBits);
+ }
+
+ /// \returns Maximum flat work group size supported by the subtarget.
+ unsigned getMaxFlatWorkGroupSize() const {
+ return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(SubtargetFeatureBits);
+ }
+
+ /// \returns Maximum number of waves per execution unit supported by the
+ /// subtarget and limited by given \p FlatWorkGroupSize.
+ unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
+ return AMDGPU::IsaInfo::getMaxWavesPerEU(SubtargetFeatureBits,
+ FlatWorkGroupSize);
+ }
+
+ /// \returns Minimum number of waves per execution unit supported by the
+ /// subtarget.
+ unsigned getMinWavesPerEU() const {
+ return AMDGPU::IsaInfo::getMinWavesPerEU(SubtargetFeatureBits);
+ }
+
+ unsigned getMaxWavesPerEU() const { return 10; }
+
+ /// Creates value range metadata on an workitemid.* inrinsic call or load.
+ bool makeLIDRangeMetadata(Instruction *I) const;
+
+ virtual ~AMDGPUCommonSubtarget() {}
+};
+
+class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo,
+ public AMDGPUCommonSubtarget {
public:
enum Generation {
- R600 = 0,
- R700,
- EVERGREEN,
- NORTHERN_ISLANDS,
- SOUTHERN_ISLANDS,
- SEA_ISLANDS,
- VOLCANIC_ISLANDS,
- GFX9,
+ // Gap for R600 generations, so we can do comparisons between
+ // AMDGPUSubtarget and r600Subtarget.
+ SOUTHERN_ISLANDS = 4,
+ SEA_ISLANDS = 5,
+ VOLCANIC_ISLANDS = 6,
+ GFX9 = 7,
};
enum {
@@ -96,13 +255,20 @@ public:
LLVMTrapHandlerRegValue = 1
};
+private:
+ SIFrameLowering FrameLowering;
+
+ /// GlobalISel related APIs.
+ std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
+ std::unique_ptr<InstructionSelector> InstSelector;
+ std::unique_ptr<LegalizerInfo> Legalizer;
+ std::unique_ptr<RegisterBankInfo> RegBankInfo;
+
protected:
// Basic subtarget description.
Triple TargetTriple;
- Generation Gen;
+ unsigned Gen;
unsigned IsaVersion;
- unsigned WavefrontSize;
- int LocalMemorySize;
int LDSBankCount;
unsigned MaxPrivateElementSize;
@@ -111,9 +277,7 @@ protected:
bool HalfRate64Ops;
// Dynamially set bits that enable features.
- bool FP32Denormals;
bool FP64FP16Denormals;
- bool FPExceptions;
bool DX10Clamp;
bool FlatForGlobal;
bool AutoWaitcntBeforeBarrier;
@@ -129,7 +293,6 @@ protected:
// Used as options.
bool EnableHugePrivateBuffer;
bool EnableVGPRSpilling;
- bool EnablePromoteAlloca;
bool EnableLoadStoreOpt;
bool EnableUnsafeDSOffsetFolding;
bool EnableSIScheduler;
@@ -146,17 +309,13 @@ protected:
bool GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
- bool Has16BitInsts;
bool HasIntClamp;
- bool HasVOP3PInsts;
- bool HasMadMixInsts;
bool HasFmaMixInsts;
bool HasMovrel;
bool HasVGPRIndexMode;
bool HasScalarStores;
bool HasScalarAtomics;
bool HasInv2PiInlineImm;
- bool HasSDWA;
bool HasSDWAOmod;
bool HasSDWAScalar;
bool HasSDWASdst;
@@ -181,7 +340,6 @@ protected:
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable;
- InstrItineraryData InstrItins;
SelectionDAGTargetInfo TSInfo;
AMDGPUAS AS;
@@ -193,13 +351,30 @@ public:
AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
StringRef GPU, StringRef FS);
- const AMDGPUInstrInfo *getInstrInfo() const override = 0;
- const AMDGPUFrameLowering *getFrameLowering() const override = 0;
- const AMDGPUTargetLowering *getTargetLowering() const override = 0;
- const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
+ virtual const SIInstrInfo *getInstrInfo() const override = 0;
- const InstrItineraryData *getInstrItineraryData() const override {
- return &InstrItins;
+ const SIFrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+
+ virtual const SITargetLowering *getTargetLowering() const override = 0;
+
+ virtual const SIRegisterInfo *getRegisterInfo() const override = 0;
+
+ const CallLowering *getCallLowering() const override {
+ return CallLoweringInfo.get();
+ }
+
+ const InstructionSelector *getInstructionSelector() const override {
+ return InstSelector.get();
+ }
+
+ const LegalizerInfo *getLegalizerInfo() const override {
+ return Legalizer.get();
+ }
+
+ const RegisterBankInfo *getRegBankInfo() const override {
+ return RegBankInfo.get();
}
// Nothing implemented, just prevent crashes on use.
@@ -209,34 +384,18 @@ public:
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
- bool isAmdHsaOS() const {
- return TargetTriple.getOS() == Triple::AMDHSA;
- }
-
bool isMesa3DOS() const {
return TargetTriple.getOS() == Triple::Mesa3D;
}
- bool isAmdPalOS() const {
- return TargetTriple.getOS() == Triple::AMDPAL;
- }
-
Generation getGeneration() const {
- return Gen;
- }
-
- unsigned getWavefrontSize() const {
- return WavefrontSize;
+ return (Generation)Gen;
}
unsigned getWavefrontSizeLog2() const {
return Log2_32(WavefrontSize);
}
- int getLocalMemorySize() const {
- return LocalMemorySize;
- }
-
int getLDSBankCount() const {
return LDSBankCount;
}
@@ -249,18 +408,10 @@ public:
return AS;
}
- bool has16BitInsts() const {
- return Has16BitInsts;
- }
-
bool hasIntClamp() const {
return HasIntClamp;
}
- bool hasVOP3PInsts() const {
- return HasVOP3PInsts;
- }
-
bool hasFP64() const {
return FP64;
}
@@ -269,6 +420,10 @@ public:
return MIMG_R128;
}
+ bool hasHWFP64() const {
+ return FP64;
+ }
+
bool hasFastFMAF32() const {
return FastFMAF32;
}
@@ -278,15 +433,15 @@ public:
}
bool hasAddr64() const {
- return (getGeneration() < VOLCANIC_ISLANDS);
+ return (getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS);
}
bool hasBFE() const {
- return (getGeneration() >= EVERGREEN);
+ return true;
}
bool hasBFI() const {
- return (getGeneration() >= EVERGREEN);
+ return true;
}
bool hasBFM() const {
@@ -294,42 +449,23 @@ public:
}
bool hasBCNT(unsigned Size) const {
- if (Size == 32)
- return (getGeneration() >= EVERGREEN);
-
- if (Size == 64)
- return (getGeneration() >= SOUTHERN_ISLANDS);
-
- return false;
- }
-
- bool hasMulU24() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasMulI24() const {
- return (getGeneration() >= SOUTHERN_ISLANDS ||
- hasCaymanISA());
+ return true;
}
bool hasFFBL() const {
- return (getGeneration() >= EVERGREEN);
+ return true;
}
bool hasFFBH() const {
- return (getGeneration() >= EVERGREEN);
+ return true;
}
bool hasMed3_16() const {
- return getGeneration() >= GFX9;
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
}
bool hasMin3Max3_16() const {
- return getGeneration() >= GFX9;
- }
-
- bool hasMadMixInsts() const {
- return HasMadMixInsts;
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
}
bool hasFmaMixInsts() const {
@@ -337,15 +473,7 @@ public:
}
bool hasCARRY() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasBORROW() const {
- return (getGeneration() >= EVERGREEN);
- }
-
- bool hasCaymanISA() const {
- return CaymanISA;
+ return true;
}
bool hasFMA() const {
@@ -360,10 +488,6 @@ public:
return EnableHugePrivateBuffer;
}
- bool isPromoteAllocaEnabled() const {
- return EnablePromoteAlloca;
- }
-
bool unsafeDSOffsetFoldingEnabled() const {
return EnableUnsafeDSOffsetFolding;
}
@@ -377,20 +501,10 @@ public:
unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount,
const Function &) const;
- /// Inverse of getMaxLocalMemWithWaveCount. Return the maximum wavecount if
- /// the given LDS memory size is the only constraint.
- unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const;
-
- unsigned getOccupancyWithLocalMemSize(const MachineFunction &MF) const;
-
bool hasFP16Denormals() const {
return FP64FP16Denormals;
}
- bool hasFP32Denormals() const {
- return FP32Denormals;
- }
-
bool hasFP64Denormals() const {
return FP64FP16Denormals;
}
@@ -399,10 +513,6 @@ public:
return getGeneration() >= AMDGPUSubtarget::GFX9;
}
- bool hasFPExceptions() const {
- return FPExceptions;
- }
-
bool enableDX10Clamp() const {
return DX10Clamp;
}
@@ -444,7 +554,7 @@ public:
}
bool hasApertureRegs() const {
- return HasApertureRegs;
+ return HasApertureRegs;
}
bool isTrapHandlerEnabled() const {
@@ -510,14 +620,6 @@ public:
return getGeneration() >= SEA_ISLANDS;
}
- bool hasFminFmaxLegacy() const {
- return getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS;
- }
-
- bool hasSDWA() const {
- return HasSDWA;
- }
-
bool hasSDWAOmod() const {
return HasSDWAOmod;
}
@@ -556,10 +658,6 @@ public:
return isAmdCodeObjectV2(F) ? 0 : 36;
}
- unsigned getAlignmentForImplicitArgPtr() const {
- return isAmdHsaOS() ? 8 : 4;
- }
-
/// \returns Number of bytes of arguments that are passed to a shader or
/// kernel in addition to the explicit ones declared for the function.
unsigned getImplicitArgNumBytes(const Function &F) const {
@@ -588,134 +686,39 @@ public:
return true;
}
- void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
- bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
+ void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b; }
+ bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal; }
/// \returns Number of execution units per compute unit supported by the
/// subtarget.
unsigned getEUsPerCU() const {
- return AMDGPU::IsaInfo::getEUsPerCU(getFeatureBits());
- }
-
- /// \returns Maximum number of work groups per compute unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWorkGroupsPerCU(getFeatureBits(),
- FlatWorkGroupSize);
+ return AMDGPU::IsaInfo::getEUsPerCU(MCSubtargetInfo::getFeatureBits());
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerCU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits());
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits());
}
/// \returns Maximum number of waves per compute unit supported by the
/// subtarget and limited by given \p FlatWorkGroupSize.
unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerCU(getFeatureBits(),
+ return AMDGPU::IsaInfo::getMaxWavesPerCU(MCSubtargetInfo::getFeatureBits(),
FlatWorkGroupSize);
}
- /// \returns Minimum number of waves per execution unit supported by the
- /// subtarget.
- unsigned getMinWavesPerEU() const {
- return AMDGPU::IsaInfo::getMinWavesPerEU(getFeatureBits());
- }
-
/// \returns Maximum number of waves per execution unit supported by the
/// subtarget without any kind of limitation.
unsigned getMaxWavesPerEU() const {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits());
- }
-
- /// \returns Maximum number of waves per execution unit supported by the
- /// subtarget and limited by given \p FlatWorkGroupSize.
- unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getMaxWavesPerEU(getFeatureBits(),
- FlatWorkGroupSize);
- }
-
- /// \returns Minimum flat work group size supported by the subtarget.
- unsigned getMinFlatWorkGroupSize() const {
- return AMDGPU::IsaInfo::getMinFlatWorkGroupSize(getFeatureBits());
- }
-
- /// \returns Maximum flat work group size supported by the subtarget.
- unsigned getMaxFlatWorkGroupSize() const {
- return AMDGPU::IsaInfo::getMaxFlatWorkGroupSize(getFeatureBits());
+ return AMDGPU::IsaInfo::getMaxWavesPerEU();
}
/// \returns Number of waves per work group supported by the subtarget and
/// limited by given \p FlatWorkGroupSize.
unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
- return AMDGPU::IsaInfo::getWavesPerWorkGroup(getFeatureBits(),
- FlatWorkGroupSize);
- }
-
- /// \returns Default range flat work group size for a calling convention.
- std::pair<unsigned, unsigned> getDefaultFlatWorkGroupSize(CallingConv::ID CC) const;
-
- /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
- /// for function \p F, or minimum/maximum flat work group sizes explicitly
- /// requested using "amdgpu-flat-work-group-size" attribute attached to
- /// function \p F.
- ///
- /// \returns Subtarget's default values if explicitly requested values cannot
- /// be converted to integer, or violate subtarget's specifications.
- std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
-
- /// \returns Subtarget's default pair of minimum/maximum number of waves per
- /// execution unit for function \p F, or minimum/maximum number of waves per
- /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
- /// attached to function \p F.
- ///
- /// \returns Subtarget's default values if explicitly requested values cannot
- /// be converted to integer, violate subtarget's specifications, or are not
- /// compatible with minimum/maximum number of waves limited by flat work group
- /// size, register usage, and/or lds usage.
- std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
-
- /// Creates value range metadata on an workitemid.* inrinsic call or load.
- bool makeLIDRangeMetadata(Instruction *I) const;
-};
-
-class R600Subtarget final : public AMDGPUSubtarget {
-private:
- R600InstrInfo InstrInfo;
- R600FrameLowering FrameLowering;
- R600TargetLowering TLInfo;
-
-public:
- R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
- const TargetMachine &TM);
-
- const R600InstrInfo *getInstrInfo() const override {
- return &InstrInfo;
- }
-
- const R600FrameLowering *getFrameLowering() const override {
- return &FrameLowering;
- }
-
- const R600TargetLowering *getTargetLowering() const override {
- return &TLInfo;
- }
-
- const R600RegisterInfo *getRegisterInfo() const override {
- return &InstrInfo.getRegisterInfo();
- }
-
- bool hasCFAluBug() const {
- return CFALUBug;
- }
-
- bool hasVertexCache() const {
- return HasVertexCache;
- }
-
- short getTexVTXClauseSize() const {
- return TexVTXClauseSize;
+ return AMDGPU::IsaInfo::getWavesPerWorkGroup(
+ MCSubtargetInfo::getFeatureBits(), FlatWorkGroupSize);
}
};
@@ -766,6 +769,8 @@ public:
const SIRegisterInfo *getRegisterInfo() const override {
return &InstrInfo.getRegisterInfo();
}
+ // static wrappers
+ static bool hasHalfRate64Ops(const TargetSubtargetInfo &STI);
// XXX - Why is this here if it isn't in the default pass set?
bool enableEarlyIfConversion() const override {
@@ -775,7 +780,7 @@ public:
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
- bool isVGPRSpillingEnabled(const Function& F) const;
+ bool isVGPRSpillingEnabled(const Function &F) const;
unsigned getMaxNumUserSGPRs() const {
return 16;
@@ -860,16 +865,18 @@ public:
unsigned getKernArgSegmentSize(const Function &F,
unsigned ExplictArgBytes) const;
- /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
+ /// Return the maximum number of waves per SIMD for kernels using \p SGPRs
+ /// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
- /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
+ /// Return the maximum number of waves per SIMD for kernels using \p VGPRs
+ /// VGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
/// \returns true if the flat_scratch register should be initialized with the
/// pointer to the wave's scratch memory rather than a size and offset.
bool flatScratchIsPointer() const {
- return getGeneration() >= GFX9;
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
}
/// \returns true if the machine has merged shaders in which s0-s7 are
@@ -880,35 +887,39 @@ public:
/// \returns SGPR allocation granularity supported by the subtarget.
unsigned getSGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getSGPRAllocGranule(getFeatureBits());
+ return AMDGPU::IsaInfo::getSGPRAllocGranule(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns SGPR encoding granularity supported by the subtarget.
unsigned getSGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getSGPREncodingGranule(getFeatureBits());
+ return AMDGPU::IsaInfo::getSGPREncodingGranule(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns Total number of SGPRs supported by the subtarget.
unsigned getTotalNumSGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumSGPRs(getFeatureBits());
+ return AMDGPU::IsaInfo::getTotalNumSGPRs(MCSubtargetInfo::getFeatureBits());
}
/// \returns Addressable number of SGPRs supported by the subtarget.
unsigned getAddressableNumSGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumSGPRs(getFeatureBits());
+ return AMDGPU::IsaInfo::getAddressableNumSGPRs(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns Minimum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumSGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumSGPRs(getFeatureBits(), WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumSGPRs(MCSubtargetInfo::getFeatureBits(),
+ WavesPerEU);
}
/// \returns Maximum number of SGPRs that meets the given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const {
- return AMDGPU::IsaInfo::getMaxNumSGPRs(getFeatureBits(), WavesPerEU,
- Addressable);
+ return AMDGPU::IsaInfo::getMaxNumSGPRs(MCSubtargetInfo::getFeatureBits(),
+ WavesPerEU, Addressable);
}
/// \returns Reserved number of SGPRs for given function \p MF.
@@ -926,34 +937,39 @@ public:
/// \returns VGPR allocation granularity supported by the subtarget.
unsigned getVGPRAllocGranule() const {
- return AMDGPU::IsaInfo::getVGPRAllocGranule(getFeatureBits());
+ return AMDGPU::IsaInfo::getVGPRAllocGranule(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns VGPR encoding granularity supported by the subtarget.
unsigned getVGPREncodingGranule() const {
- return AMDGPU::IsaInfo::getVGPREncodingGranule(getFeatureBits());
+ return AMDGPU::IsaInfo::getVGPREncodingGranule(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns Total number of VGPRs supported by the subtarget.
unsigned getTotalNumVGPRs() const {
- return AMDGPU::IsaInfo::getTotalNumVGPRs(getFeatureBits());
+ return AMDGPU::IsaInfo::getTotalNumVGPRs(MCSubtargetInfo::getFeatureBits());
}
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
- return AMDGPU::IsaInfo::getAddressableNumVGPRs(getFeatureBits());
+ return AMDGPU::IsaInfo::getAddressableNumVGPRs(
+ MCSubtargetInfo::getFeatureBits());
}
/// \returns Minimum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMinNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMinNumVGPRs(getFeatureBits(), WavesPerEU);
+ return AMDGPU::IsaInfo::getMinNumVGPRs(MCSubtargetInfo::getFeatureBits(),
+ WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets given number of waves per
/// execution unit requirement supported by the subtarget.
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const {
- return AMDGPU::IsaInfo::getMaxNumVGPRs(getFeatureBits(), WavesPerEU);
+ return AMDGPU::IsaInfo::getMaxNumVGPRs(MCSubtargetInfo::getFeatureBits(),
+ WavesPerEU);
}
/// \returns Maximum number of VGPRs that meets number of waves per execution
@@ -971,6 +987,127 @@ public:
const override;
};
+
+class R600Subtarget final : public R600GenSubtargetInfo,
+ public AMDGPUCommonSubtarget {
+public:
+ enum Generation { R600 = 0, R700 = 1, EVERGREEN = 2, NORTHERN_ISLANDS = 3 };
+
+private:
+ R600InstrInfo InstrInfo;
+ R600FrameLowering FrameLowering;
+ bool FMA;
+ bool CaymanISA;
+ bool CFALUBug;
+ bool DX10Clamp;
+ bool HasVertexCache;
+ bool R600ALUInst;
+ bool FP64;
+ short TexVTXClauseSize;
+ Generation Gen;
+ R600TargetLowering TLInfo;
+ InstrItineraryData InstrItins;
+ SelectionDAGTargetInfo TSInfo;
+ AMDGPUAS AS;
+
+public:
+ R600Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
+ const TargetMachine &TM);
+
+ const R600InstrInfo *getInstrInfo() const override { return &InstrInfo; }
+
+ const R600FrameLowering *getFrameLowering() const override {
+ return &FrameLowering;
+ }
+
+ const R600TargetLowering *getTargetLowering() const override {
+ return &TLInfo;
+ }
+
+ const R600RegisterInfo *getRegisterInfo() const override {
+ return &InstrInfo.getRegisterInfo();
+ }
+
+ const InstrItineraryData *getInstrItineraryData() const override {
+ return &InstrItins;
+ }
+
+ // Nothing implemented, just prevent crashes on use.
+ const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+ return &TSInfo;
+ }
+
+ void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
+
+ Generation getGeneration() const {
+ return Gen;
+ }
+
+ unsigned getStackAlignment() const {
+ return 4;
+ }
+
+ R600Subtarget &initializeSubtargetDependencies(const Triple &TT,
+ StringRef GPU, StringRef FS);
+
+ bool hasBFE() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasBFI() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasBCNT(unsigned Size) const {
+ if (Size == 32)
+ return (getGeneration() >= EVERGREEN);
+
+ return false;
+ }
+
+ bool hasBORROW() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasCARRY() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasCaymanISA() const {
+ return CaymanISA;
+ }
+
+ bool hasFFBL() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasFFBH() const {
+ return (getGeneration() >= EVERGREEN);
+ }
+
+ bool hasFMA() const { return FMA; }
+
+ unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const {
+ return 36;
+ }
+
+ bool hasCFAluBug() const { return CFALUBug; }
+
+ bool hasVertexCache() const { return HasVertexCache; }
+
+ short getTexVTXClauseSize() const { return TexVTXClauseSize; }
+
+ AMDGPUAS getAMDGPUAS() const { return AS; }
+
+ bool enableMachineScheduler() const override {
+ return true;
+ }
+
+ bool enableSubRegLiveness() const override {
+ return true;
+ }
+};
+
} // end namespace llvm
#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.h Thu Jun 28 16:47:12 2018
@@ -34,7 +34,6 @@ namespace llvm {
class AMDGPUTargetMachine : public LLVMTargetMachine {
protected:
std::unique_ptr<TargetLoweringObjectFile> TLOF;
- AMDGPUIntrinsicInfo IntrinsicInfo;
AMDGPUAS AS;
StringRef getGPUName(const Function &F) const;
@@ -49,12 +48,8 @@ public:
CodeGenOpt::Level OL);
~AMDGPUTargetMachine() override;
- const AMDGPUSubtarget *getSubtargetImpl() const;
- const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override = 0;
-
- const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
- return &IntrinsicInfo;
- }
+ const TargetSubtargetInfo *getSubtargetImpl() const;
+ const TargetSubtargetInfo *getSubtargetImpl(const Function &) const override = 0;
TargetLoweringObjectFile *getObjFileLowering() const override {
return TLOF.get();
@@ -103,6 +98,7 @@ public:
class GCNTargetMachine final : public AMDGPUTargetMachine {
private:
+ AMDGPUIntrinsicInfo IntrinsicInfo;
mutable StringMap<std::unique_ptr<SISubtarget>> SubtargetMap;
public:
@@ -117,6 +113,10 @@ public:
TargetTransformInfo getTargetTransformInfo(const Function &F) override;
+ const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
+ return &IntrinsicInfo;
+ }
+
bool useIPRA() const override {
return true;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Thu Jun 28 16:47:12 2018
@@ -102,7 +102,7 @@ void AMDGPUTTIImpl::getUnrollingPreferen
unsigned ThresholdPrivate = UnrollThresholdPrivate;
unsigned ThresholdLocal = UnrollThresholdLocal;
unsigned MaxBoost = std::max(ThresholdPrivate, ThresholdLocal);
- AMDGPUAS ASST = ST->getAMDGPUAS();
+ const AMDGPUAS &ASST = AMDGPU::getAMDGPUAS(TargetTriple);
for (const BasicBlock *BB : L->getBlocks()) {
const DataLayout &DL = BB->getModule()->getDataLayout();
unsigned LocalGEPsSeen = 0;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h Thu Jun 28 16:47:12 2018
@@ -45,17 +45,12 @@ class AMDGPUTTIImpl final : public Basic
friend BaseT;
- const AMDGPUSubtarget *ST;
- const AMDGPUTargetLowering *TLI;
+ Triple TargetTriple;
public:
explicit AMDGPUTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
- ST(TM->getSubtargetImpl(F)),
- TLI(ST->getTargetLowering()) {}
-
- const AMDGPUSubtarget *getST() const { return ST; }
- const AMDGPUTargetLowering *getTLI() const { return TLI; }
+ TargetTriple(TM->getTargetTriple()) {}
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
@@ -123,7 +118,7 @@ class GCNTTIImpl final : public BasicTTI
public:
explicit GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
- ST(TM->getSubtargetImpl(F)),
+ ST(static_cast<const AMDGPUSubtarget*>(TM->getSubtargetImpl(F))),
TLI(ST->getTargetLowering()),
CommonTTI(TM, F),
IsGraphicsShader(AMDGPU::isShader(F.getCallingConv())) {}
@@ -211,18 +206,18 @@ class R600TTIImpl final : public BasicTT
friend BaseT;
- const AMDGPUSubtarget *ST;
+ const R600Subtarget *ST;
const AMDGPUTargetLowering *TLI;
AMDGPUTTIImpl CommonTTI;
public:
explicit R600TTIImpl(const AMDGPUTargetMachine *TM, const Function &F)
: BaseT(TM, F.getParent()->getDataLayout()),
- ST(TM->getSubtargetImpl(F)),
+ ST(static_cast<const R600Subtarget*>(TM->getSubtargetImpl(F))),
TLI(ST->getTargetLowering()),
CommonTTI(TM, F) {}
- const AMDGPUSubtarget *getST() const { return ST; }
+ const R600Subtarget *getST() const { return ST; }
const AMDGPUTargetLowering *getTLI() const { return TLI; }
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp Thu Jun 28 16:47:12 2018
@@ -432,19 +432,19 @@ void AMDGPUCFGStructurizer::reversePredi
for (;; --I) {
if (I == MBB.end())
continue;
- if (I->getOpcode() == AMDGPU::PRED_X) {
+ if (I->getOpcode() == R600::PRED_X) {
switch (I->getOperand(2).getImm()) {
- case AMDGPU::PRED_SETE_INT:
- I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
+ case R600::PRED_SETE_INT:
+ I->getOperand(2).setImm(R600::PRED_SETNE_INT);
return;
- case AMDGPU::PRED_SETNE_INT:
- I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
+ case R600::PRED_SETNE_INT:
+ I->getOperand(2).setImm(R600::PRED_SETE_INT);
return;
- case AMDGPU::PRED_SETE:
- I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
+ case R600::PRED_SETE:
+ I->getOperand(2).setImm(R600::PRED_SETNE);
return;
- case AMDGPU::PRED_SETNE:
- I->getOperand(2).setImm(AMDGPU::PRED_SETE);
+ case R600::PRED_SETNE:
+ I->getOperand(2).setImm(R600::PRED_SETE);
return;
default:
llvm_unreachable("PRED_X Opcode invalid!");
@@ -513,10 +513,10 @@ void AMDGPUCFGStructurizer::insertCondBr
int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
switch(OldOpcode) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
- case AMDGPU::BRANCH_COND_i32:
- case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
+ case R600::JUMP_COND:
+ case R600::JUMP: return R600::IF_PREDICATE_SET;
+ case R600::BRANCH_COND_i32:
+ case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32;
default: llvm_unreachable("internal error");
}
return -1;
@@ -524,10 +524,10 @@ int AMDGPUCFGStructurizer::getBranchNzer
int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
switch(OldOpcode) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
- case AMDGPU::BRANCH_COND_i32:
- case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
+ case R600::JUMP_COND:
+ case R600::JUMP: return R600::IF_PREDICATE_SET;
+ case R600::BRANCH_COND_i32:
+ case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32;
default: llvm_unreachable("internal error");
}
return -1;
@@ -535,8 +535,8 @@ int AMDGPUCFGStructurizer::getBranchZero
int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
switch(OldOpcode) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
+ case R600::JUMP_COND:
+ case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32;
default: llvm_unreachable("internal error");
}
return -1;
@@ -544,8 +544,8 @@ int AMDGPUCFGStructurizer::getContinueNz
int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
switch(OldOpcode) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
+ case R600::JUMP_COND:
+ case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32;
default: llvm_unreachable("internal error");
}
return -1;
@@ -573,9 +573,9 @@ AMDGPUCFGStructurizer::getFalseBranch(Ma
bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
switch (MI->getOpcode()) {
- case AMDGPU::JUMP_COND:
- case AMDGPU::BRANCH_COND_i32:
- case AMDGPU::BRANCH_COND_f32: return true;
+ case R600::JUMP_COND:
+ case R600::BRANCH_COND_i32:
+ case R600::BRANCH_COND_f32: return true;
default:
return false;
}
@@ -584,8 +584,8 @@ bool AMDGPUCFGStructurizer::isCondBranch
bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
switch (MI->getOpcode()) {
- case AMDGPU::JUMP:
- case AMDGPU::BRANCH:
+ case R600::JUMP:
+ case R600::BRANCH:
return true;
default:
return false;
@@ -634,7 +634,7 @@ MachineInstr *AMDGPUCFGStructurizer::get
MachineBasicBlock::reverse_iterator It = MBB->rbegin();
if (It != MBB->rend()) {
MachineInstr *instr = &(*It);
- if (instr->getOpcode() == AMDGPU::RETURN)
+ if (instr->getOpcode() == R600::RETURN)
return instr;
}
return nullptr;
@@ -687,8 +687,8 @@ void AMDGPUCFGStructurizer::wrapup(Machi
MachineBasicBlock::iterator E = MBB->end();
MachineBasicBlock::iterator It = Pre;
while (It != E) {
- if (Pre->getOpcode() == AMDGPU::CONTINUE
- && It->getOpcode() == AMDGPU::ENDLOOP)
+ if (Pre->getOpcode() == R600::CONTINUE
+ && It->getOpcode() == R600::ENDLOOP)
ContInstr.push_back(&*Pre);
Pre = It;
++It;
@@ -1303,15 +1303,15 @@ int AMDGPUCFGStructurizer::improveSimple
bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
- //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
- MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
+ //insert R600::ENDIF to avoid special case "input landBlk == NULL"
+ MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF);
if (LandBlkHasOtherPred) {
report_fatal_error("Extra register needed to handle CFG");
unsigned CmpResReg =
HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
report_fatal_error("Extra compare instruction needed to handle CFG");
- insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
+ insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET,
CmpResReg, DebugLoc());
}
@@ -1319,7 +1319,7 @@ int AMDGPUCFGStructurizer::improveSimple
// cause an assertion failure in the PostRA scheduling pass.
unsigned InitReg =
HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
- insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
+ insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg,
DebugLoc());
if (MigrateTrue) {
@@ -1329,7 +1329,7 @@ int AMDGPUCFGStructurizer::improveSimple
// (initVal != 1).
report_fatal_error("Extra register needed to handle CFG");
}
- insertInstrBefore(I, AMDGPU::ELSE);
+ insertInstrBefore(I, R600::ELSE);
if (MigrateFalse) {
migrateInstruction(FalseMBB, LandBlk, I);
@@ -1341,7 +1341,7 @@ int AMDGPUCFGStructurizer::improveSimple
if (LandBlkHasOtherPred) {
// add endif
- insertInstrBefore(I, AMDGPU::ENDIF);
+ insertInstrBefore(I, R600::ENDIF);
// put initReg = 2 to other predecessors of landBlk
for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
@@ -1414,7 +1414,7 @@ void AMDGPUCFGStructurizer::mergeIfthene
}
if (FalseMBB) {
- insertInstrBefore(I, AMDGPU::ELSE);
+ insertInstrBefore(I, R600::ELSE);
MBB->splice(I, FalseMBB, FalseMBB->begin(),
FalseMBB->end());
MBB->removeSuccessor(FalseMBB, true);
@@ -1423,7 +1423,7 @@ void AMDGPUCFGStructurizer::mergeIfthene
retireBlock(FalseMBB);
MLI->removeBlock(FalseMBB);
}
- insertInstrBefore(I, AMDGPU::ENDIF);
+ insertInstrBefore(I, R600::ENDIF);
BranchMI->eraseFromParent();
@@ -1436,8 +1436,8 @@ void AMDGPUCFGStructurizer::mergeLooplan
LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
<< " land = BB" << LandMBB->getNumber() << "\n";);
- insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
- insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
+ insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc());
+ insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc());
DstBlk->replaceSuccessor(DstBlk, LandMBB);
}
@@ -1453,9 +1453,9 @@ void AMDGPUCFGStructurizer::mergeLoopbre
MachineBasicBlock::iterator I = BranchMI;
if (TrueBranch != LandMBB)
reversePredicateSetter(I, *I->getParent());
- insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
- insertInstrBefore(I, AMDGPU::BREAK);
- insertInstrBefore(I, AMDGPU::ENDIF);
+ insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL);
+ insertInstrBefore(I, R600::BREAK);
+ insertInstrBefore(I, R600::ENDIF);
//now branchInst can be erase safely
BranchMI->eraseFromParent();
//now take care of successors, retire blocks
@@ -1484,8 +1484,8 @@ void AMDGPUCFGStructurizer::settleLoopco
getBranchZeroOpcode(OldOpcode);
insertCondBranchBefore(I, BranchOpcode, DL);
// insertEnd to ensure phi-moves, if exist, go before the continue-instr.
- insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
- insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
+ insertInstrEnd(ContingMBB, R600::CONTINUE, DL);
+ insertInstrEnd(ContingMBB, R600::ENDIF, DL);
} else {
int BranchOpcode =
TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
@@ -1500,7 +1500,7 @@ void AMDGPUCFGStructurizer::settleLoopco
// location we've just inserted that reference here so it should be
// representative insertEnd to ensure phi-moves, if exist, go before the
// continue-instr.
- insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
+ insertInstrEnd(ContingMBB, R600::CONTINUE,
getLastDebugLocInBB(ContingMBB));
}
}
@@ -1627,7 +1627,7 @@ void AMDGPUCFGStructurizer::addDummyExit
SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
FuncRep->push_back(DummyExitBlk); //insert to function
- insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
+ insertInstrEnd(DummyExitBlk, R600::RETURN);
for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
E = RetMBB.end(); It != E; ++It) {
Modified: llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/AMDGPU/CMakeLists.txt Thu Jun 28 16:47:12 2018
@@ -4,7 +4,6 @@ tablegen(LLVM AMDGPUGenAsmMatcher.inc -g
tablegen(LLVM AMDGPUGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM AMDGPUGenCallingConv.inc -gen-callingconv)
tablegen(LLVM AMDGPUGenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM AMDGPUGenDFAPacketizer.inc -gen-dfa-packetizer)
tablegen(LLVM AMDGPUGenDisassemblerTables.inc -gen-disassembler)
tablegen(LLVM AMDGPUGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM AMDGPUGenIntrinsicEnums.inc -gen-tgt-intrinsic-enums)
@@ -19,6 +18,16 @@ tablegen(LLVM AMDGPUGenSubtargetInfo.inc
set(LLVM_TARGET_DEFINITIONS AMDGPUGISel.td)
tablegen(LLVM AMDGPUGenGlobalISel.inc -gen-global-isel)
+set(LLVM_TARGET_DEFINITIONS R600.td)
+tablegen(LLVM R600GenAsmWriter.inc -gen-asm-writer)
+tablegen(LLVM R600GenCallingConv.inc -gen-callingconv)
+tablegen(LLVM R600GenDAGISel.inc -gen-dag-isel)
+tablegen(LLVM R600GenDFAPacketizer.inc -gen-dfa-packetizer)
+tablegen(LLVM R600GenInstrInfo.inc -gen-instr-info)
+tablegen(LLVM R600GenMCCodeEmitter.inc -gen-emitter)
+tablegen(LLVM R600GenRegisterInfo.inc -gen-register-info)
+tablegen(LLVM R600GenSubtargetInfo.inc -gen-subtarget)
+
add_public_tablegen_target(AMDGPUCommonTableGen)
add_llvm_target(AMDGPUCodeGen
Modified: llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp Thu Jun 28 16:47:12 2018
@@ -20,6 +20,7 @@
#include "Disassembler/AMDGPUDisassembler.h"
#include "AMDGPU.h"
#include "AMDGPURegisterInfo.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIDefines.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "Utils/AMDGPUBaseInfo.h"
Modified: llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/EvergreenInstructions.td Thu Jun 28 16:47:12 2018
@@ -14,14 +14,13 @@
//===----------------------------------------------------------------------===//
def isEG : Predicate<
- "Subtarget->getGeneration() >= AMDGPUSubtarget::EVERGREEN && "
- "Subtarget->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && "
+ "Subtarget->getGeneration() >= R600Subtarget::EVERGREEN && "
"!Subtarget->hasCaymanISA()"
>;
def isEGorCayman : Predicate<
- "Subtarget->getGeneration() == AMDGPUSubtarget::EVERGREEN ||"
- "Subtarget->getGeneration() == AMDGPUSubtarget::NORTHERN_ISLANDS"
+ "Subtarget->getGeneration() == R600Subtarget::EVERGREEN ||"
+ "Subtarget->getGeneration() == R600Subtarget::NORTHERN_ISLANDS"
>;
class EGPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp Thu Jun 28 16:47:12 2018
@@ -510,11 +510,6 @@ void AMDGPUInstPrinter::printImmediate64
void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) {
- static_cast<R600InstPrinter*>(this)->printOperand(MI, OpNo, O);
- return;
- }
-
if (OpNo >= MI->getNumOperands()) {
O << "/*Missing OP" << OpNo << "*/";
return;
@@ -965,11 +960,6 @@ void AMDGPUInstPrinter::printVGPRIndexMo
void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
- if (!STI.getFeatureBits()[AMDGPU::FeatureGCN]) {
- static_cast<R600InstPrinter*>(this)->printMemOperand(MI, OpNo, O);
- return;
- }
-
printOperand(MI, OpNo, STI, O);
O << ", ";
printOperand(MI, OpNo + 1, STI, O);
@@ -995,16 +985,6 @@ void AMDGPUInstPrinter::printIfSet(const
O << Asm;
}
-void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printAbs(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printClamp(MI, OpNo, O);
-}
-
void AMDGPUInstPrinter::printHigh(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1031,70 +1011,6 @@ void AMDGPUInstPrinter::printOModSI(cons
O << " div:2";
}
-void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printLiteral(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printLast(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printNeg(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printOMOD(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printRel(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printUpdateExecMask(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printUpdatePred(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printWrite(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI,
- raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printBankSwizzle(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printRSel(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printCT(MI, OpNo, O);
-}
-
-void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
- const MCSubtargetInfo &STI, raw_ostream &O) {
- static_cast<R600InstPrinter*>(this)->printKCache(MI, OpNo, O);
-}
-
void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI,
raw_ostream &O) {
@@ -1299,6 +1215,13 @@ void AMDGPUInstPrinter::printHwreg(const
#include "AMDGPUGenAsmWriter.inc"
+void R600InstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+ StringRef Annot, const MCSubtargetInfo &STI) {
+ O.flush();
+ printInstruction(MI, O);
+ printAnnotation(O, Annot);
+}
+
void R600InstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
AMDGPUInstPrinter::printIfSet(MI, OpNo, O, '|');
@@ -1417,7 +1340,7 @@ void R600InstPrinter::printOperand(const
if (Op.isReg()) {
switch (Op.getReg()) {
// This is the default predicate state, so we don't need to print it.
- case AMDGPU::PRED_SEL_OFF:
+ case R600::PRED_SEL_OFF:
break;
default:
@@ -1493,3 +1416,5 @@ void R600InstPrinter::printWrite(const M
O << " (MASKED)";
}
}
+
+#include "R600GenAsmWriter.inc"
Modified: llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h Thu Jun 28 16:47:12 2018
@@ -218,13 +218,16 @@ protected:
raw_ostream &O);
};
-// FIXME: R600 specific parts of AMDGPUInstrPrinter should be moved here, and
-// MCTargetDesc should be using R600InstPrinter for the R600 target.
-class R600InstPrinter : public AMDGPUInstPrinter {
+class R600InstPrinter : public MCInstPrinter {
public:
R600InstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
- : AMDGPUInstPrinter(MAI, MII, MRI) {}
+ : MCInstPrinter(MAI, MII, MRI) {}
+
+ void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+ const MCSubtargetInfo &STI) override;
+ void printInstruction(const MCInst *MI, raw_ostream &O);
+ static const char *getRegisterName(unsigned RegNo);
void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp Thu Jun 28 16:47:12 2018
@@ -38,9 +38,17 @@ using namespace llvm;
#define GET_SUBTARGETINFO_MC_DESC
#include "AMDGPUGenSubtargetInfo.inc"
+#define NoSchedModel NoSchedModelR600
+#define GET_SUBTARGETINFO_MC_DESC
+#include "R600GenSubtargetInfo.inc"
+#undef NoSchedModelR600
+
#define GET_REGINFO_MC_DESC
#include "AMDGPUGenRegisterInfo.inc"
+#define GET_REGINFO_MC_DESC
+#include "R600GenRegisterInfo.inc"
+
static MCInstrInfo *createAMDGPUMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitAMDGPUMCInstrInfo(X);
@@ -49,12 +57,17 @@ static MCInstrInfo *createAMDGPUMCInstrI
static MCRegisterInfo *createAMDGPUMCRegisterInfo(const Triple &TT) {
MCRegisterInfo *X = new MCRegisterInfo();
- InitAMDGPUMCRegisterInfo(X, 0);
+ if (TT.getArch() == Triple::r600)
+ InitR600MCRegisterInfo(X, 0);
+ else
+ InitAMDGPUMCRegisterInfo(X, 0);
return X;
}
static MCSubtargetInfo *
createAMDGPUMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
+ if (TT.getArch() == Triple::r600)
+ return createR600MCSubtargetInfoImpl(TT, CPU, FS);
return createAMDGPUMCSubtargetInfoImpl(TT, CPU, FS);
}
@@ -63,8 +76,10 @@ static MCInstPrinter *createAMDGPUMCInst
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI) {
- return T.getArch() == Triple::r600 ? new R600InstPrinter(MAI, MII, MRI) :
- new AMDGPUInstPrinter(MAI, MII, MRI);
+ if (T.getArch() == Triple::r600)
+ return new R600InstPrinter(MAI, MII, MRI);
+ else
+ return new AMDGPUInstPrinter(MAI, MII, MRI);
}
static MCTargetStreamer *createAMDGPUAsmTargetStreamer(MCStreamer &S,
@@ -90,10 +105,12 @@ static MCStreamer *createMCStreamer(cons
}
extern "C" void LLVMInitializeAMDGPUTargetMC() {
+
+ TargetRegistry::RegisterMCInstrInfo(getTheGCNTarget(), createAMDGPUMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(getTheAMDGPUTarget(), createR600MCInstrInfo);
for (Target *T : {&getTheAMDGPUTarget(), &getTheGCNTarget()}) {
RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
- TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
TargetRegistry::RegisterMCRegInfo(*T, createAMDGPUMCRegisterInfo);
TargetRegistry::RegisterMCSubtargetInfo(*T, createAMDGPUMCSubtargetInfo);
TargetRegistry::RegisterMCInstPrinter(*T, createAMDGPUMCInstPrinter);
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h Thu Jun 28 16:47:12 2018
@@ -40,6 +40,7 @@ Target &getTheGCNTarget();
MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
MCContext &Ctx);
+MCInstrInfo *createR600MCInstrInfo();
MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
@@ -59,6 +60,10 @@ createAMDGPUELFObjectWriter(bool Is64Bit
#include "AMDGPUGenRegisterInfo.inc"
#undef GET_REGINFO_ENUM
+#define GET_REGINFO_ENUM
+#include "R600GenRegisterInfo.inc"
+#undef GET_REGINFO_ENUM
+
#define GET_INSTRINFO_ENUM
#define GET_INSTRINFO_OPERAND_ENUM
#define GET_INSTRINFO_SCHED_ENUM
@@ -67,9 +72,20 @@ createAMDGPUELFObjectWriter(bool Is64Bit
#undef GET_INSTRINFO_OPERAND_ENUM
#undef GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_ENUM
+#define GET_INSTRINFO_OPERAND_ENUM
+#define GET_INSTRINFO_SCHED_ENUM
+#include "R600GenInstrInfo.inc"
+#undef GET_INSTRINFO_SCHED_ENUM
+#undef GET_INSTRINFO_OPERAND_ENUM
+#undef GET_INSTRINFO_ENUM
#define GET_SUBTARGETINFO_ENUM
#include "AMDGPUGenSubtargetInfo.inc"
#undef GET_SUBTARGETINFO_ENUM
+#define GET_SUBTARGETINFO_ENUM
+#include "R600GenSubtargetInfo.inc"
+#undef GET_SUBTARGETINFO_ENUM
+
#endif
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/CMakeLists.txt Thu Jun 28 16:47:12 2018
@@ -8,5 +8,6 @@ add_llvm_library(LLVMAMDGPUDesc
AMDGPUMCTargetDesc.cpp
AMDGPUTargetStreamer.cpp
R600MCCodeEmitter.cpp
+ R600MCTargetDesc.cpp
SIMCCodeEmitter.cpp
)
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp Thu Jun 28 16:47:12 2018
@@ -15,7 +15,6 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/AMDGPUFixupKinds.h"
-#include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "R600Defines.h"
#include "llvm/MC/MCCodeEmitter.h"
@@ -36,30 +35,40 @@ using namespace llvm;
namespace {
-class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
+class R600MCCodeEmitter : public MCCodeEmitter {
const MCRegisterInfo &MRI;
+ const MCInstrInfo &MCII;
public:
R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri)
- : AMDGPUMCCodeEmitter(mcii), MRI(mri) {}
+ : MRI(mri), MCII(mcii) {}
R600MCCodeEmitter(const R600MCCodeEmitter &) = delete;
R600MCCodeEmitter &operator=(const R600MCCodeEmitter &) = delete;
/// Encode the instruction and write it to the OS.
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
+ const MCSubtargetInfo &STI) const;
/// \returns the encoding for an MCOperand.
uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const override;
+ const MCSubtargetInfo &STI) const;
private:
+
void Emit(uint32_t value, raw_ostream &OS) const;
void Emit(uint64_t value, raw_ostream &OS) const;
unsigned getHWReg(unsigned regNo) const;
+
+ uint64_t getBinaryCodeForInstr(const MCInst &MI,
+ SmallVectorImpl<MCFixup> &Fixups,
+ const MCSubtargetInfo &STI) const;
+ uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+ void verifyInstructionPredicates(const MCInst &MI,
+ uint64_t AvailableFeatures) const;
+
};
} // end anonymous namespace
@@ -94,16 +103,16 @@ void R600MCCodeEmitter::encodeInstructio
computeAvailableFeatures(STI.getFeatureBits()));
const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
- if (MI.getOpcode() == AMDGPU::RETURN ||
- MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
- MI.getOpcode() == AMDGPU::ALU_CLAUSE ||
- MI.getOpcode() == AMDGPU::BUNDLE ||
- MI.getOpcode() == AMDGPU::KILL) {
+ if (MI.getOpcode() == R600::RETURN ||
+ MI.getOpcode() == R600::FETCH_CLAUSE ||
+ MI.getOpcode() == R600::ALU_CLAUSE ||
+ MI.getOpcode() == R600::BUNDLE ||
+ MI.getOpcode() == R600::KILL) {
return;
} else if (IS_VTX(Desc)) {
uint64_t InstWord01 = getBinaryCodeForInstr(MI, Fixups, STI);
uint32_t InstWord2 = MI.getOperand(2).getImm(); // Offset
- if (!(STI.getFeatureBits()[AMDGPU::FeatureCaymanISA])) {
+ if (!(STI.getFeatureBits()[R600::FeatureCaymanISA])) {
InstWord2 |= 1 << 19; // Mega-Fetch bit
}
@@ -136,7 +145,7 @@ void R600MCCodeEmitter::encodeInstructio
Emit((uint32_t) 0, OS);
} else {
uint64_t Inst = getBinaryCodeForInstr(MI, Fixups, STI);
- if ((STI.getFeatureBits()[AMDGPU::FeatureR600ALUInst]) &&
+ if ((STI.getFeatureBits()[R600::FeatureR600ALUInst]) &&
((Desc.TSFlags & R600_InstFlag::OP1) ||
Desc.TSFlags & R600_InstFlag::OP2)) {
uint64_t ISAOpCode = Inst & (0x3FFULL << 39);
@@ -186,4 +195,4 @@ uint64_t R600MCCodeEmitter::getMachineOp
}
#define ENABLE_INSTR_PREDICATE_VERIFIER
-#include "AMDGPUGenMCCodeEmitter.inc"
+#include "R600GenMCCodeEmitter.inc"
Added: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp?rev=335942&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp (added)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp Thu Jun 28 16:47:12 2018
@@ -0,0 +1,27 @@
+//===-- R600MCTargetDesc.cpp - R600 Target Descriptions -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This file provides R600 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUMCTargetDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+
+using namespace llvm;
+
+#define GET_INSTRINFO_MC_DESC
+#include "R600GenInstrInfo.inc"
+
+MCInstrInfo *llvm::createR600MCInstrInfo() {
+ MCInstrInfo *X = new MCInstrInfo();
+ InitR600MCInstrInfo(X);
+ return X;
+}
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp Thu Jun 28 16:47:12 2018
@@ -438,3 +438,6 @@ uint64_t SIMCCodeEmitter::getMachineOpVa
llvm_unreachable("Encoding of this operand type is not supported yet.");
return 0;
}
+
+#define ENABLE_INSTR_PREDICATE_VERIFIER
+#include "AMDGPUGenMCCodeEmitter.inc"
Added: llvm/trunk/lib/Target/AMDGPU/R600.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600.td?rev=335942&view=auto
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600.td (added)
+++ llvm/trunk/lib/Target/AMDGPU/R600.td Thu Jun 28 16:47:12 2018
@@ -0,0 +1,59 @@
+//===-- R600.td - R600 Tablegen files ----------------------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+def R600InstrInfo : InstrInfo {
+ let guessInstructionProperties = 1;
+ let noNamedPositionallyEncodedOperands = 1;
+}
+
+def R600 : Target {
+ let InstructionSet = R600InstrInfo;
+ let AllowRegisterRenaming = 1;
+}
+
+let Namespace = "R600" in {
+
+foreach Index = 0-15 in {
+ def sub#Index : SubRegIndex<32, !shl(Index, 5)>;
+}
+
+include "R600RegisterInfo.td"
+
+}
+
+def NullALU : InstrItinClass;
+def ALU_NULL : FuncUnit;
+
+include "AMDGPUFeatures.td"
+include "R600Schedule.td"
+include "R600Processors.td"
+include "AMDGPUInstrInfo.td"
+include "AMDGPUInstructions.td"
+include "R600Instructions.td"
+include "R700Instructions.td"
+include "EvergreenInstructions.td"
+include "CaymanInstructions.td"
+
+// Calling convention for R600
+def CC_R600 : CallingConv<[
+ CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
+ T0_XYZW, T1_XYZW, T2_XYZW, T3_XYZW, T4_XYZW, T5_XYZW, T6_XYZW, T7_XYZW,
+ T8_XYZW, T9_XYZW, T10_XYZW, T11_XYZW, T12_XYZW, T13_XYZW, T14_XYZW, T15_XYZW,
+ T16_XYZW, T17_XYZW, T18_XYZW, T19_XYZW, T20_XYZW, T21_XYZW, T22_XYZW,
+ T23_XYZW, T24_XYZW, T25_XYZW, T26_XYZW, T27_XYZW, T28_XYZW, T29_XYZW,
+ T30_XYZW, T31_XYZW, T32_XYZW
+ ]>>>
+]>;
+
+// Calling convention for compute kernels
+def CC_R600_Kernel : CallingConv<[
+ CCCustom<"allocateKernArg">
+]>;
Modified: llvm/trunk/lib/Target/AMDGPU/R600AsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600AsmPrinter.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600AsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600AsmPrinter.cpp Thu Jun 28 16:47:12 2018
@@ -51,7 +51,7 @@ void R600AsmPrinter::EmitProgramInfoR600
for (const MachineBasicBlock &MBB : MF) {
for (const MachineInstr &MI : MBB) {
- if (MI.getOpcode() == AMDGPU::KILLGT)
+ if (MI.getOpcode() == R600::KILLGT)
killPixel = true;
unsigned numOperands = MI.getNumOperands();
for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
Modified: llvm/trunk/lib/Target/AMDGPU/R600ClauseMergePass.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ClauseMergePass.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ClauseMergePass.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ClauseMergePass.cpp Thu Jun 28 16:47:12 2018
@@ -34,8 +34,8 @@ namespace {
static bool isCFAlu(const MachineInstr &MI) {
switch (MI.getOpcode()) {
- case AMDGPU::CF_ALU:
- case AMDGPU::CF_ALU_PUSH_BEFORE:
+ case R600::CF_ALU:
+ case R600::CF_ALU_PUSH_BEFORE:
return true;
default:
return false;
@@ -85,20 +85,20 @@ char &llvm::R600ClauseMergePassID = R600
unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
assert(isCFAlu(MI));
return MI
- .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::COUNT))
+ .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::COUNT))
.getImm();
}
bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
assert(isCFAlu(MI));
return MI
- .getOperand(TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::Enabled))
+ .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::Enabled))
.getImm();
}
void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
MachineInstr &CFAlu) const {
- int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
+ int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
I++;
do {
@@ -117,7 +117,7 @@ void R600ClauseMergePass::cleanPotential
bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
const MachineInstr &LatrCFAlu) const {
assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
- int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT);
+ int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
unsigned RootInstCount = getCFAluSize(RootCFAlu),
LaterInstCount = getCFAluSize(LatrCFAlu);
unsigned CumuledInsts = RootInstCount + LaterInstCount;
@@ -125,15 +125,15 @@ bool R600ClauseMergePass::mergeIfPossibl
LLVM_DEBUG(dbgs() << "Excess inst counts\n");
return false;
}
- if (RootCFAlu.getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+ if (RootCFAlu.getOpcode() == R600::CF_ALU_PUSH_BEFORE)
return false;
// Is KCache Bank 0 compatible ?
int Mode0Idx =
- TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0);
+ TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE0);
int KBank0Idx =
- TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0);
+ TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK0);
int KBank0LineIdx =
- TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0);
+ TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR0);
if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
RootCFAlu.getOperand(Mode0Idx).getImm() &&
(LatrCFAlu.getOperand(KBank0Idx).getImm() !=
@@ -145,11 +145,11 @@ bool R600ClauseMergePass::mergeIfPossibl
}
// Is KCache Bank 1 compatible ?
int Mode1Idx =
- TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1);
+ TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE1);
int KBank1Idx =
- TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1);
+ TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK1);
int KBank1LineIdx =
- TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1);
+ TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR1);
if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
RootCFAlu.getOperand(Mode1Idx).getImm() &&
(LatrCFAlu.getOperand(KBank1Idx).getImm() !=
Modified: llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp Thu Jun 28 16:47:12 2018
@@ -94,7 +94,7 @@ bool CFStack::branchStackContains(CFStac
}
bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
- if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
+ if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
getLoopDepth() > 1)
return true;
@@ -103,10 +103,10 @@ bool CFStack::requiresWorkAroundForInst(
switch(Opcode) {
default: return false;
- case AMDGPU::CF_ALU_PUSH_BEFORE:
- case AMDGPU::CF_ALU_ELSE_AFTER:
- case AMDGPU::CF_ALU_BREAK:
- case AMDGPU::CF_ALU_CONTINUE:
+ case R600::CF_ALU_PUSH_BEFORE:
+ case R600::CF_ALU_ELSE_AFTER:
+ case R600::CF_ALU_BREAK:
+ case R600::CF_ALU_CONTINUE:
if (CurrentSubEntries == 0)
return false;
if (ST->getWavefrontSize() == 64) {
@@ -168,8 +168,8 @@ void CFStack::updateMaxStackSize() {
void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
CFStack::StackItem Item = CFStack::ENTRY;
switch(Opcode) {
- case AMDGPU::CF_PUSH_EG:
- case AMDGPU::CF_ALU_PUSH_BEFORE:
+ case R600::CF_PUSH_EG:
+ case R600::CF_ALU_PUSH_BEFORE:
if (!isWQM) {
if (!ST->hasCaymanISA() &&
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
@@ -240,8 +240,8 @@ private:
bool IsTrivialInst(MachineInstr &MI) const {
switch (MI.getOpcode()) {
- case AMDGPU::KILL:
- case AMDGPU::RETURN:
+ case R600::KILL:
+ case R600::RETURN:
return true;
default:
return false;
@@ -253,41 +253,41 @@ private:
bool isEg = (ST->getGeneration() >= R600Subtarget::EVERGREEN);
switch (CFI) {
case CF_TC:
- Opcode = isEg ? AMDGPU::CF_TC_EG : AMDGPU::CF_TC_R600;
+ Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
break;
case CF_VC:
- Opcode = isEg ? AMDGPU::CF_VC_EG : AMDGPU::CF_VC_R600;
+ Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
break;
case CF_CALL_FS:
- Opcode = isEg ? AMDGPU::CF_CALL_FS_EG : AMDGPU::CF_CALL_FS_R600;
+ Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
break;
case CF_WHILE_LOOP:
- Opcode = isEg ? AMDGPU::WHILE_LOOP_EG : AMDGPU::WHILE_LOOP_R600;
+ Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
break;
case CF_END_LOOP:
- Opcode = isEg ? AMDGPU::END_LOOP_EG : AMDGPU::END_LOOP_R600;
+ Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
break;
case CF_LOOP_BREAK:
- Opcode = isEg ? AMDGPU::LOOP_BREAK_EG : AMDGPU::LOOP_BREAK_R600;
+ Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
break;
case CF_LOOP_CONTINUE:
- Opcode = isEg ? AMDGPU::CF_CONTINUE_EG : AMDGPU::CF_CONTINUE_R600;
+ Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
break;
case CF_JUMP:
- Opcode = isEg ? AMDGPU::CF_JUMP_EG : AMDGPU::CF_JUMP_R600;
+ Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
break;
case CF_ELSE:
- Opcode = isEg ? AMDGPU::CF_ELSE_EG : AMDGPU::CF_ELSE_R600;
+ Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
break;
case CF_POP:
- Opcode = isEg ? AMDGPU::POP_EG : AMDGPU::POP_R600;
+ Opcode = isEg ? R600::POP_EG : R600::POP_R600;
break;
case CF_END:
if (ST->hasCaymanISA()) {
- Opcode = AMDGPU::CF_END_CM;
+ Opcode = R600::CF_END_CM;
break;
}
- Opcode = isEg ? AMDGPU::CF_END_EG : AMDGPU::CF_END_R600;
+ Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
break;
}
assert (Opcode && "No opcode selected");
@@ -305,21 +305,21 @@ private:
continue;
if (MO.isDef()) {
unsigned Reg = MO.getReg();
- if (AMDGPU::R600_Reg128RegClass.contains(Reg))
+ if (R600::R600_Reg128RegClass.contains(Reg))
DstMI = Reg;
else
DstMI = TRI->getMatchingSuperReg(Reg,
AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
- &AMDGPU::R600_Reg128RegClass);
+ &R600::R600_Reg128RegClass);
}
if (MO.isUse()) {
unsigned Reg = MO.getReg();
- if (AMDGPU::R600_Reg128RegClass.contains(Reg))
+ if (R600::R600_Reg128RegClass.contains(Reg))
SrcMI = Reg;
else
SrcMI = TRI->getMatchingSuperReg(Reg,
AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
- &AMDGPU::R600_Reg128RegClass);
+ &R600::R600_Reg128RegClass);
}
}
if ((DstRegs.find(SrcMI) == DstRegs.end())) {
@@ -359,15 +359,15 @@ private:
void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
static const unsigned LiteralRegs[] = {
- AMDGPU::ALU_LITERAL_X,
- AMDGPU::ALU_LITERAL_Y,
- AMDGPU::ALU_LITERAL_Z,
- AMDGPU::ALU_LITERAL_W
+ R600::ALU_LITERAL_X,
+ R600::ALU_LITERAL_Y,
+ R600::ALU_LITERAL_Z,
+ R600::ALU_LITERAL_W
};
const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
TII->getSrcs(MI);
for (const auto &Src:Srcs) {
- if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X)
+ if (Src.first->getReg() != R600::ALU_LITERAL_X)
continue;
int64_t Imm = Src.second;
std::vector<MachineOperand *>::iterator It =
@@ -377,7 +377,7 @@ private:
// Get corresponding Operand
MachineOperand &Operand = MI.getOperand(
- TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
+ TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
if (It != Lits.end()) {
// Reuse existing literal reg
@@ -400,7 +400,7 @@ private:
unsigned LiteralPair0 = Literals[i];
unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
- TII->get(AMDGPU::LITERALS))
+ TII->get(R600::LITERALS))
.addImm(LiteralPair0)
.addImm(LiteralPair1);
}
@@ -442,7 +442,7 @@ private:
}
for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
- TII->get(AMDGPU::LITERALS));
+ TII->get(R600::LITERALS));
if (Literals[i]->isImm()) {
MILit.addImm(Literals[i]->getImm());
} else {
@@ -471,7 +471,7 @@ private:
unsigned &CfCount) {
CounterPropagateAddr(*Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
- BuildMI(BB, DL, TII->get(AMDGPU::FETCH_CLAUSE)).addImm(CfCount);
+ BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
BB->splice(InsertPos, BB, Clause.second[i]);
}
@@ -483,7 +483,7 @@ private:
Clause.first->getOperand(0).setImm(0);
CounterPropagateAddr(*Clause.first, CfCount);
MachineBasicBlock *BB = Clause.first->getParent();
- BuildMI(BB, DL, TII->get(AMDGPU::ALU_CLAUSE)).addImm(CfCount);
+ BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
BB->splice(InsertPos, BB, Clause.second[i]);
}
@@ -540,34 +540,34 @@ public:
}
MachineBasicBlock::iterator MI = I;
- if (MI->getOpcode() != AMDGPU::ENDIF)
+ if (MI->getOpcode() != R600::ENDIF)
LastAlu.back() = nullptr;
- if (MI->getOpcode() == AMDGPU::CF_ALU)
+ if (MI->getOpcode() == R600::CF_ALU)
LastAlu.back() = &*MI;
I++;
bool RequiresWorkAround =
CFStack.requiresWorkAroundForInst(MI->getOpcode());
switch (MI->getOpcode()) {
- case AMDGPU::CF_ALU_PUSH_BEFORE:
+ case R600::CF_ALU_PUSH_BEFORE:
if (RequiresWorkAround) {
LLVM_DEBUG(dbgs()
<< "Applying bug work-around for ALU_PUSH_BEFORE\n");
- BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
+ BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
.addImm(CfCount + 1)
.addImm(1);
- MI->setDesc(TII->get(AMDGPU::CF_ALU));
+ MI->setDesc(TII->get(R600::CF_ALU));
CfCount++;
- CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
+ CFStack.pushBranch(R600::CF_PUSH_EG);
} else
- CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
+ CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
LLVM_FALLTHROUGH;
- case AMDGPU::CF_ALU:
+ case R600::CF_ALU:
I = MI;
AluClauses.push_back(MakeALUClause(MBB, I));
LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
CfCount++;
break;
- case AMDGPU::WHILELOOP: {
+ case R600::WHILELOOP: {
CFStack.pushLoop();
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_WHILE_LOOP))
@@ -580,7 +580,7 @@ public:
CfCount++;
break;
}
- case AMDGPU::ENDLOOP: {
+ case R600::ENDLOOP: {
CFStack.popLoop();
std::pair<unsigned, std::set<MachineInstr *>> Pair =
std::move(LoopStack.back());
@@ -592,7 +592,7 @@ public:
CfCount++;
break;
}
- case AMDGPU::IF_PREDICATE_SET: {
+ case R600::IF_PREDICATE_SET: {
LastAlu.push_back(nullptr);
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_JUMP))
@@ -604,7 +604,7 @@ public:
CfCount++;
break;
}
- case AMDGPU::ELSE: {
+ case R600::ELSE: {
MachineInstr * JumpInst = IfThenElseStack.back();
IfThenElseStack.pop_back();
CounterPropagateAddr(*JumpInst, CfCount);
@@ -618,7 +618,7 @@ public:
CfCount++;
break;
}
- case AMDGPU::ENDIF: {
+ case R600::ENDIF: {
CFStack.popBranch();
if (LastAlu.back()) {
ToPopAfter.push_back(LastAlu.back());
@@ -640,7 +640,7 @@ public:
MI->eraseFromParent();
break;
}
- case AMDGPU::BREAK: {
+ case R600::BREAK: {
CfCount ++;
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_LOOP_BREAK))
@@ -649,7 +649,7 @@ public:
MI->eraseFromParent();
break;
}
- case AMDGPU::CONTINUE: {
+ case R600::CONTINUE: {
MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
getHWInstrDesc(CF_LOOP_CONTINUE))
.addImm(0);
@@ -658,12 +658,12 @@ public:
CfCount++;
break;
}
- case AMDGPU::RETURN: {
+ case R600::RETURN: {
DebugLoc DL = MBB.findDebugLoc(MI);
BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
CfCount++;
if (CfCount % 2) {
- BuildMI(MBB, I, DL, TII->get(AMDGPU::PAD));
+ BuildMI(MBB, I, DL, TII->get(R600::PAD));
CfCount++;
}
MI->eraseFromParent();
@@ -684,7 +684,7 @@ public:
for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
MachineInstr *Alu = ToPopAfter[i];
BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
- TII->get(AMDGPU::CF_ALU_POP_AFTER))
+ TII->get(R600::CF_ALU_POP_AFTER))
.addImm(Alu->getOperand(0).getImm())
.addImm(Alu->getOperand(1).getImm())
.addImm(Alu->getOperand(2).getImm())
Modified: llvm/trunk/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp Thu Jun 28 16:47:12 2018
@@ -52,12 +52,12 @@ private:
unsigned OccupiedDwords(MachineInstr &MI) const {
switch (MI.getOpcode()) {
- case AMDGPU::INTERP_PAIR_XY:
- case AMDGPU::INTERP_PAIR_ZW:
- case AMDGPU::INTERP_VEC_LOAD:
- case AMDGPU::DOT_4:
+ case R600::INTERP_PAIR_XY:
+ case R600::INTERP_PAIR_ZW:
+ case R600::INTERP_VEC_LOAD:
+ case R600::DOT_4:
return 4;
- case AMDGPU::KILL:
+ case R600::KILL:
return 0;
default:
break;
@@ -77,7 +77,7 @@ private:
E = MI.operands_end();
It != E; ++It) {
MachineOperand &MO = *It;
- if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+ if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
++NumLiteral;
}
return 1 + NumLiteral;
@@ -89,12 +89,12 @@ private:
if (TII->isVector(MI) || TII->isCubeOp(MI.getOpcode()))
return true;
switch (MI.getOpcode()) {
- case AMDGPU::PRED_X:
- case AMDGPU::INTERP_PAIR_XY:
- case AMDGPU::INTERP_PAIR_ZW:
- case AMDGPU::INTERP_VEC_LOAD:
- case AMDGPU::COPY:
- case AMDGPU::DOT_4:
+ case R600::PRED_X:
+ case R600::INTERP_PAIR_XY:
+ case R600::INTERP_PAIR_ZW:
+ case R600::INTERP_VEC_LOAD:
+ case R600::COPY:
+ case R600::DOT_4:
return true;
default:
return false;
@@ -103,9 +103,9 @@ private:
bool IsTrivialInst(MachineInstr &MI) const {
switch (MI.getOpcode()) {
- case AMDGPU::KILL:
- case AMDGPU::RETURN:
- case AMDGPU::IMPLICIT_DEF:
+ case R600::KILL:
+ case R600::RETURN:
+ case R600::IMPLICIT_DEF:
return true;
default:
return false;
@@ -132,16 +132,16 @@ private:
bool UpdateInstr = true) const {
std::vector<std::pair<unsigned, unsigned>> UsedKCache;
- if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != AMDGPU::DOT_4)
+ if (!TII->isALUInstr(MI.getOpcode()) && MI.getOpcode() != R600::DOT_4)
return true;
const SmallVectorImpl<std::pair<MachineOperand *, int64_t>> &Consts =
TII->getSrcs(MI);
assert(
- (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == AMDGPU::DOT_4) &&
+ (TII->isALUInstr(MI.getOpcode()) || MI.getOpcode() == R600::DOT_4) &&
"Can't assign Const");
for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
- if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
+ if (Consts[i].first->getReg() != R600::ALU_CONST)
continue;
unsigned Sel = Consts[i].second;
unsigned Chan = Sel & 3, Index = ((Sel >> 2) - 512) & 31;
@@ -172,16 +172,16 @@ private:
return true;
for (unsigned i = 0, j = 0, n = Consts.size(); i < n; ++i) {
- if (Consts[i].first->getReg() != AMDGPU::ALU_CONST)
+ if (Consts[i].first->getReg() != R600::ALU_CONST)
continue;
switch(UsedKCache[j].first) {
case 0:
Consts[i].first->setReg(
- AMDGPU::R600_KC0RegClass.getRegister(UsedKCache[j].second));
+ R600::R600_KC0RegClass.getRegister(UsedKCache[j].second));
break;
case 1:
Consts[i].first->setReg(
- AMDGPU::R600_KC1RegClass.getRegister(UsedKCache[j].second));
+ R600::R600_KC1RegClass.getRegister(UsedKCache[j].second));
break;
default:
llvm_unreachable("Wrong Cache Line");
@@ -253,7 +253,7 @@ private:
break;
if (AluInstCount > TII->getMaxAlusPerClause())
break;
- if (I->getOpcode() == AMDGPU::PRED_X) {
+ if (I->getOpcode() == R600::PRED_X) {
// We put PRED_X in its own clause to ensure that ifcvt won't create
// clauses with more than 128 insts.
// IfCvt is indeed checking that "then" and "else" branches of an if
@@ -289,7 +289,7 @@ private:
AluInstCount += OccupiedDwords(*I);
}
unsigned Opcode = PushBeforeModifier ?
- AMDGPU::CF_ALU_PUSH_BEFORE : AMDGPU::CF_ALU;
+ R600::CF_ALU_PUSH_BEFORE : R600::CF_ALU;
BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), TII->get(Opcode))
// We don't use the ADDR field until R600ControlFlowFinalizer pass, where
// it is safe to assume it is 0. However if we always put 0 here, the ifcvt
@@ -322,7 +322,7 @@ public:
BB != BB_E; ++BB) {
MachineBasicBlock &MBB = *BB;
MachineBasicBlock::iterator I = MBB.begin();
- if (I != MBB.end() && I->getOpcode() == AMDGPU::CF_ALU)
+ if (I != MBB.end() && I->getOpcode() == R600::CF_ALU)
continue; // BB was already parsed
for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
if (isALU(*I)) {
Modified: llvm/trunk/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp Thu Jun 28 16:47:12 2018
@@ -96,16 +96,16 @@ bool R600ExpandSpecialInstrsPass::runOnM
// Expand LDS_*_RET instructions
if (TII->isLDSRetInstr(MI.getOpcode())) {
- int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
+ int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
assert(DstIdx != -1);
MachineOperand &DstOp = MI.getOperand(DstIdx);
MachineInstr *Mov = TII->buildMovInstr(&MBB, I,
- DstOp.getReg(), AMDGPU::OQAP);
- DstOp.setReg(AMDGPU::OQAP);
+ DstOp.getReg(), R600::OQAP);
+ DstOp.setReg(R600::OQAP);
int LDSPredSelIdx = TII->getOperandIdx(MI.getOpcode(),
- AMDGPU::OpName::pred_sel);
+ R600::OpName::pred_sel);
int MovPredSelIdx = TII->getOperandIdx(Mov->getOpcode(),
- AMDGPU::OpName::pred_sel);
+ R600::OpName::pred_sel);
// Copy the pred_sel bit
Mov->getOperand(MovPredSelIdx).setReg(
MI.getOperand(LDSPredSelIdx).getReg());
@@ -114,7 +114,7 @@ bool R600ExpandSpecialInstrsPass::runOnM
switch (MI.getOpcode()) {
default: break;
// Expand PRED_X to one of the PRED_SET instructions.
- case AMDGPU::PRED_X: {
+ case R600::PRED_X: {
uint64_t Flags = MI.getOperand(3).getImm();
// The native opcode used by PRED_X is stored as an immediate in the
// third operand.
@@ -122,17 +122,18 @@ bool R600ExpandSpecialInstrsPass::runOnM
MI.getOperand(2).getImm(), // opcode
MI.getOperand(0).getReg(), // dst
MI.getOperand(1).getReg(), // src0
- AMDGPU::ZERO); // src1
+ R600::ZERO); // src1
TII->addFlag(*PredSet, 0, MO_FLAG_MASK);
if (Flags & MO_FLAG_PUSH) {
- TII->setImmOperand(*PredSet, AMDGPU::OpName::update_exec_mask, 1);
+ TII->setImmOperand(*PredSet, R600::OpName::update_exec_mask, 1);
} else {
- TII->setImmOperand(*PredSet, AMDGPU::OpName::update_pred, 1);
+ TII->setImmOperand(*PredSet, R600::OpName::update_pred, 1);
}
MI.eraseFromParent();
continue;
}
- case AMDGPU::DOT_4: {
+ case R600::DOT_4: {
+
const R600RegisterInfo &TRI = TII->getRegisterInfo();
unsigned DstReg = MI.getOperand(0).getReg();
@@ -141,7 +142,7 @@ bool R600ExpandSpecialInstrsPass::runOnM
for (unsigned Chan = 0; Chan < 4; ++Chan) {
bool Mask = (Chan != TRI.getHWRegChan(DstReg));
unsigned SubDstReg =
- AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+ R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
MachineInstr *BMI =
TII->buildSlotOfVectorInstruction(MBB, &MI, Chan, SubDstReg);
if (Chan > 0) {
@@ -156,10 +157,10 @@ bool R600ExpandSpecialInstrsPass::runOnM
// While not strictly necessary from hw point of view, we force
// all src operands of a dot4 inst to belong to the same slot.
unsigned Src0 = BMI->getOperand(
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0))
+ TII->getOperandIdx(Opcode, R600::OpName::src0))
.getReg();
unsigned Src1 = BMI->getOperand(
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1))
+ TII->getOperandIdx(Opcode, R600::OpName::src1))
.getReg();
(void) Src0;
(void) Src1;
@@ -206,14 +207,14 @@ bool R600ExpandSpecialInstrsPass::runOnM
// T0_W = CUBE T1_Y, T1_Z
for (unsigned Chan = 0; Chan < 4; Chan++) {
unsigned DstReg = MI.getOperand(
- TII->getOperandIdx(MI, AMDGPU::OpName::dst)).getReg();
+ TII->getOperandIdx(MI, R600::OpName::dst)).getReg();
unsigned Src0 = MI.getOperand(
- TII->getOperandIdx(MI, AMDGPU::OpName::src0)).getReg();
+ TII->getOperandIdx(MI, R600::OpName::src0)).getReg();
unsigned Src1 = 0;
// Determine the correct source registers
if (!IsCube) {
- int Src1Idx = TII->getOperandIdx(MI, AMDGPU::OpName::src1);
+ int Src1Idx = TII->getOperandIdx(MI, R600::OpName::src1);
if (Src1Idx != -1) {
Src1 = MI.getOperand(Src1Idx).getReg();
}
@@ -241,7 +242,7 @@ bool R600ExpandSpecialInstrsPass::runOnM
// the current Channel.
Mask = (Chan != TRI.getHWRegChan(DstReg));
unsigned DstBase = TRI.getEncodingValue(DstReg) & HW_REG_MASK;
- DstReg = AMDGPU::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
+ DstReg = R600::R600_TReg32RegClass.getRegister((DstBase * 4) + Chan);
}
// Set the IsLast bit
@@ -250,11 +251,11 @@ bool R600ExpandSpecialInstrsPass::runOnM
// Add the new instruction
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
- case AMDGPU::CUBE_r600_pseudo:
- Opcode = AMDGPU::CUBE_r600_real;
+ case R600::CUBE_r600_pseudo:
+ Opcode = R600::CUBE_r600_real;
break;
- case AMDGPU::CUBE_eg_pseudo:
- Opcode = AMDGPU::CUBE_eg_real;
+ case R600::CUBE_eg_pseudo:
+ Opcode = R600::CUBE_eg_real;
break;
default:
break;
@@ -271,12 +272,12 @@ bool R600ExpandSpecialInstrsPass::runOnM
if (NotLast) {
TII->addFlag(*NewMI, 0, MO_FLAG_NOT_LAST);
}
- SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::clamp);
- SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::literal);
- SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_abs);
- SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_abs);
- SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src0_neg);
- SetFlagInNewMI(NewMI, &MI, AMDGPU::OpName::src1_neg);
+ SetFlagInNewMI(NewMI, &MI, R600::OpName::clamp);
+ SetFlagInNewMI(NewMI, &MI, R600::OpName::literal);
+ SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_abs);
+ SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_abs);
+ SetFlagInNewMI(NewMI, &MI, R600::OpName::src0_neg);
+ SetFlagInNewMI(NewMI, &MI, R600::OpName::src1_neg);
}
MI.eraseFromParent();
}
Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Thu Jun 28 16:47:12 2018
@@ -14,7 +14,6 @@
#include "R600ISelLowering.h"
#include "AMDGPUFrameLowering.h"
-#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "R600Defines.h"
#include "R600FrameLowering.h"
@@ -51,17 +50,31 @@
using namespace llvm;
+static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ MachineFunction &MF = State.getMachineFunction();
+ AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
+
+ uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(),
+ ArgFlags.getOrigAlign());
+ State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return true;
+}
+
+#include "R600GenCallingConv.inc"
+
R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
const R600Subtarget &STI)
- : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
- addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
- addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
- addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
- addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
- addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
- addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
+ : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
+ addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
+ addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
+ addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
+ addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
+ addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
+ addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
- computeRegisterProperties(STI.getRegisterInfo());
+ computeRegisterProperties(Subtarget->getRegisterInfo());
// Legalize loads and stores to the private address space.
setOperationAction(ISD::LOAD, MVT::i32, Custom);
@@ -148,6 +161,11 @@ R600TargetLowering::R600TargetLowering(c
setOperationAction(ISD::FSUB, MVT::f32, Expand);
+ setOperationAction(ISD::FCEIL, MVT::f64, Custom);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
+ setOperationAction(ISD::FRINT, MVT::f64, Custom);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
+
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
@@ -216,6 +234,34 @@ R600TargetLowering::R600TargetLowering(c
setOperationAction(ISD::FMA, MVT::f32, Expand);
setOperationAction(ISD::FMA, MVT::f64, Expand);
}
+
+ // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
+ // need it for R600.
+ if (!Subtarget->hasFP32Denormals())
+ setOperationAction(ISD::FMAD, MVT::f32, Legal);
+
+ if (!Subtarget->hasBFI()) {
+ // fcopysign can be done in a single instruction with BFI.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ }
+
+ if (!Subtarget->hasBCNT(32))
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+
+ if (!Subtarget->hasBCNT(64))
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+ if (Subtarget->hasFFBH())
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
+
+ if (Subtarget->hasFFBL())
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
+
+ // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
+ // need it for R600.
+ if (Subtarget->hasBFE())
+ setHasExtractBitsInsn(true);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
@@ -246,14 +292,10 @@ R600TargetLowering::R600TargetLowering(c
setTargetDAGCombine(ISD::LOAD);
}
-const R600Subtarget *R600TargetLowering::getSubtarget() const {
- return static_cast<const R600Subtarget *>(Subtarget);
-}
-
static inline bool isEOP(MachineBasicBlock::iterator I) {
if (std::next(I) == I->getParent()->end())
return false;
- return std::next(I)->getOpcode() == AMDGPU::RETURN;
+ return std::next(I)->getOpcode() == R600::RETURN;
}
MachineBasicBlock *
@@ -262,24 +304,24 @@ R600TargetLowering::EmitInstrWithCustomI
MachineFunction *MF = BB->getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
MachineBasicBlock::iterator I = MI;
- const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
+ const R600InstrInfo *TII = Subtarget->getInstrInfo();
switch (MI.getOpcode()) {
default:
// Replace LDS_*_RET instruction that don't have any uses with the
// equivalent LDS_*_NORET instruction.
if (TII->isLDSRetInstr(MI.getOpcode())) {
- int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
+ int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
assert(DstIdx != -1);
MachineInstrBuilder NewMI;
// FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
// LDS_1A2D support and remove this special case.
if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
- MI.getOpcode() == AMDGPU::LDS_CMPST_RET)
+ MI.getOpcode() == R600::LDS_CMPST_RET)
return BB;
NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
- TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
+ TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
NewMI.add(MI.getOperand(i));
}
@@ -288,23 +330,23 @@ R600TargetLowering::EmitInstrWithCustomI
}
break;
- case AMDGPU::FABS_R600: {
+ case R600::FABS_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(
- *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
+ *BB, I, R600::MOV, MI.getOperand(0).getReg(),
MI.getOperand(1).getReg());
TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
break;
}
- case AMDGPU::FNEG_R600: {
+ case R600::FNEG_R600: {
MachineInstr *NewMI = TII->buildDefaultInstruction(
- *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
+ *BB, I, R600::MOV, MI.getOperand(0).getReg(),
MI.getOperand(1).getReg());
TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
break;
}
- case AMDGPU::MASK_WRITE: {
+ case R600::MASK_WRITE: {
unsigned maskedRegister = MI.getOperand(0).getReg();
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
@@ -312,7 +354,7 @@ R600TargetLowering::EmitInstrWithCustomI
break;
}
- case AMDGPU::MOV_IMM_F32:
+ case R600::MOV_IMM_F32:
TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
.getFPImm()
->getValueAPF()
@@ -320,39 +362,39 @@ R600TargetLowering::EmitInstrWithCustomI
.getZExtValue());
break;
- case AMDGPU::MOV_IMM_I32:
+ case R600::MOV_IMM_I32:
TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
MI.getOperand(1).getImm());
break;
- case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
+ case R600::MOV_IMM_GLOBAL_ADDR: {
//TODO: Perhaps combine this instruction with the next if possible
auto MIB = TII->buildDefaultInstruction(
- *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X);
- int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
+ *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
+ int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
//TODO: Ugh this is rather ugly
MIB->getOperand(Idx) = MI.getOperand(1);
break;
}
- case AMDGPU::CONST_COPY: {
+ case R600::CONST_COPY: {
MachineInstr *NewMI = TII->buildDefaultInstruction(
- *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
- TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel,
+ *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
+ TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
MI.getOperand(1).getImm());
break;
}
- case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
- case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
- case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
+ case R600::RAT_WRITE_CACHELESS_32_eg:
+ case R600::RAT_WRITE_CACHELESS_64_eg:
+ case R600::RAT_WRITE_CACHELESS_128_eg:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
.addImm(isEOP(I)); // Set End of program bit
break;
- case AMDGPU::RAT_STORE_TYPED_eg:
+ case R600::RAT_STORE_TYPED_eg:
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
@@ -360,49 +402,49 @@ R600TargetLowering::EmitInstrWithCustomI
.addImm(isEOP(I)); // Set End of program bit
break;
- case AMDGPU::BRANCH:
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
+ case R600::BRANCH:
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
.add(MI.getOperand(0));
break;
- case AMDGPU::BRANCH_COND_f32: {
+ case R600::BRANCH_COND_f32: {
MachineInstr *NewMI =
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
- AMDGPU::PREDICATE_BIT)
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
+ R600::PREDICATE_BIT)
.add(MI.getOperand(1))
- .addImm(AMDGPU::PRED_SETNE)
+ .addImm(R600::PRED_SETNE)
.addImm(0); // Flags
TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
.add(MI.getOperand(0))
- .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ .addReg(R600::PREDICATE_BIT, RegState::Kill);
break;
}
- case AMDGPU::BRANCH_COND_i32: {
+ case R600::BRANCH_COND_i32: {
MachineInstr *NewMI =
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
- AMDGPU::PREDICATE_BIT)
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
+ R600::PREDICATE_BIT)
.add(MI.getOperand(1))
- .addImm(AMDGPU::PRED_SETNE_INT)
+ .addImm(R600::PRED_SETNE_INT)
.addImm(0); // Flags
TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
- BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
+ BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
.add(MI.getOperand(0))
- .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ .addReg(R600::PREDICATE_BIT, RegState::Kill);
break;
}
- case AMDGPU::EG_ExportSwz:
- case AMDGPU::R600_ExportSwz: {
+ case R600::EG_ExportSwz:
+ case R600::R600_ExportSwz: {
// Instruction is left unmodified if its not the last one of its type
bool isLastInstructionOfItsType = true;
unsigned InstExportType = MI.getOperand(1).getImm();
for (MachineBasicBlock::iterator NextExportInst = std::next(I),
EndBlock = BB->end(); NextExportInst != EndBlock;
NextExportInst = std::next(NextExportInst)) {
- if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
- NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
+ if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
+ NextExportInst->getOpcode() == R600::R600_ExportSwz) {
unsigned CurrentInstExportType = NextExportInst->getOperand(1)
.getImm();
if (CurrentInstExportType == InstExportType) {
@@ -414,7 +456,7 @@ R600TargetLowering::EmitInstrWithCustomI
bool EOP = isEOP(I);
if (!EOP && !isLastInstructionOfItsType)
return BB;
- unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
+ unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
.add(MI.getOperand(0))
.add(MI.getOperand(1))
@@ -427,7 +469,7 @@ R600TargetLowering::EmitInstrWithCustomI
.addImm(EOP);
break;
}
- case AMDGPU::RETURN: {
+ case R600::RETURN: {
return BB;
}
}
@@ -583,23 +625,23 @@ SDValue R600TargetLowering::LowerOperati
return LowerImplicitParameter(DAG, VT, DL, 8);
case Intrinsic::r600_read_tgid_x:
- return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T1_X, VT);
+ return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
+ R600::T1_X, VT);
case Intrinsic::r600_read_tgid_y:
- return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T1_Y, VT);
+ return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
+ R600::T1_Y, VT);
case Intrinsic::r600_read_tgid_z:
- return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T1_Z, VT);
+ return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
+ R600::T1_Z, VT);
case Intrinsic::r600_read_tidig_x:
- return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T0_X, VT);
+ return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
+ R600::T0_X, VT);
case Intrinsic::r600_read_tidig_y:
- return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T0_Y, VT);
+ return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
+ R600::T0_Y, VT);
case Intrinsic::r600_read_tidig_z:
- return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
- AMDGPU::T0_Z, VT);
+ return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
+ R600::T0_Z, VT);
case Intrinsic::r600_recipsqrt_ieee:
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
@@ -1521,7 +1563,7 @@ SDValue R600TargetLowering::LowerBRCOND(
SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
- const R600FrameLowering *TFL = getSubtarget()->getFrameLowering();
+ const R600FrameLowering *TFL = Subtarget->getFrameLowering();
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
@@ -1533,6 +1575,28 @@ SDValue R600TargetLowering::lowerFrameIn
Op.getValueType());
}
+CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
+ bool IsVarArg) const {
+ switch (CC) {
+ case CallingConv::AMDGPU_KERNEL:
+ case CallingConv::SPIR_KERNEL:
+ case CallingConv::C:
+ case CallingConv::Fast:
+ case CallingConv::Cold:
+ return CC_R600_Kernel;
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_ES:
+ case CallingConv::AMDGPU_LS:
+ return CC_R600;
+ default:
+ report_fatal_error("Unsupported calling convention.");
+ }
+}
+
/// XXX Only kernel functions are supported, so we can assume for now that
/// every function is a kernel function, but in the future we should use
/// separate calling conventions for kernel and non-kernel functions.
@@ -1563,7 +1627,7 @@ SDValue R600TargetLowering::LowerFormalA
}
if (AMDGPU::isShader(CallConv)) {
- unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
InVals.push_back(Register);
continue;
@@ -1594,7 +1658,7 @@ SDValue R600TargetLowering::LowerFormalA
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
unsigned PartOffset = VA.getLocMemOffset();
- unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF.getFunction()) +
+ unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) +
VA.getLocMemOffset();
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
@@ -1981,26 +2045,26 @@ bool R600TargetLowering::FoldOperand(SDN
SDValue &Src, SDValue &Neg, SDValue &Abs,
SDValue &Sel, SDValue &Imm,
SelectionDAG &DAG) const {
- const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
+ const R600InstrInfo *TII = Subtarget->getInstrInfo();
if (!Src.isMachineOpcode())
return false;
switch (Src.getMachineOpcode()) {
- case AMDGPU::FNEG_R600:
+ case R600::FNEG_R600:
if (!Neg.getNode())
return false;
Src = Src.getOperand(0);
Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
return true;
- case AMDGPU::FABS_R600:
+ case R600::FABS_R600:
if (!Abs.getNode())
return false;
Src = Src.getOperand(0);
Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
return true;
- case AMDGPU::CONST_COPY: {
+ case R600::CONST_COPY: {
unsigned Opcode = ParentNode->getMachineOpcode();
- bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+ bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
if (!Sel.getNode())
return false;
@@ -2011,17 +2075,17 @@ bool R600TargetLowering::FoldOperand(SDN
// Gather constants values
int SrcIndices[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
+ TII->getOperandIdx(Opcode, R600::OpName::src0),
+ TII->getOperandIdx(Opcode, R600::OpName::src1),
+ TII->getOperandIdx(Opcode, R600::OpName::src2),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_X),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_W),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_X),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_W)
};
std::vector<unsigned> Consts;
for (int OtherSrcIdx : SrcIndices) {
@@ -2034,7 +2098,7 @@ bool R600TargetLowering::FoldOperand(SDN
}
if (RegisterSDNode *Reg =
dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
- if (Reg->getReg() == AMDGPU::ALU_CONST) {
+ if (Reg->getReg() == R600::ALU_CONST) {
ConstantSDNode *Cst
= cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
Consts.push_back(Cst->getZExtValue());
@@ -2049,30 +2113,30 @@ bool R600TargetLowering::FoldOperand(SDN
}
Sel = CstOffset;
- Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
+ Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
return true;
}
- case AMDGPU::MOV_IMM_GLOBAL_ADDR:
+ case R600::MOV_IMM_GLOBAL_ADDR:
// Check if the Imm slot is used. Taken from below.
if (cast<ConstantSDNode>(Imm)->getZExtValue())
return false;
Imm = Src.getOperand(0);
- Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
+ Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
return true;
- case AMDGPU::MOV_IMM_I32:
- case AMDGPU::MOV_IMM_F32: {
- unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
+ case R600::MOV_IMM_I32:
+ case R600::MOV_IMM_F32: {
+ unsigned ImmReg = R600::ALU_LITERAL_X;
uint64_t ImmValue = 0;
- if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
+ if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
float FloatValue = FPC->getValueAPF().convertToFloat();
if (FloatValue == 0.0) {
- ImmReg = AMDGPU::ZERO;
+ ImmReg = R600::ZERO;
} else if (FloatValue == 0.5) {
- ImmReg = AMDGPU::HALF;
+ ImmReg = R600::HALF;
} else if (FloatValue == 1.0) {
- ImmReg = AMDGPU::ONE;
+ ImmReg = R600::ONE;
} else {
ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
}
@@ -2080,9 +2144,9 @@ bool R600TargetLowering::FoldOperand(SDN
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
uint64_t Value = C->getZExtValue();
if (Value == 0) {
- ImmReg = AMDGPU::ZERO;
+ ImmReg = R600::ZERO;
} else if (Value == 1) {
- ImmReg = AMDGPU::ONE_INT;
+ ImmReg = R600::ONE_INT;
} else {
ImmValue = Value;
}
@@ -2091,7 +2155,7 @@ bool R600TargetLowering::FoldOperand(SDN
// Check that we aren't already using an immediate.
// XXX: It's possible for an instruction to have more than one
// immediate operand, but this is not supported yet.
- if (ImmReg == AMDGPU::ALU_LITERAL_X) {
+ if (ImmReg == R600::ALU_LITERAL_X) {
if (!Imm.getNode())
return false;
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
@@ -2111,7 +2175,7 @@ bool R600TargetLowering::FoldOperand(SDN
/// Fold the instructions after selecting them
SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
SelectionDAG &DAG) const {
- const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
+ const R600InstrInfo *TII = Subtarget->getInstrInfo();
if (!Node->isMachineOpcode())
return Node;
@@ -2120,36 +2184,36 @@ SDNode *R600TargetLowering::PostISelFold
std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
- if (Opcode == AMDGPU::DOT_4) {
+ if (Opcode == R600::DOT_4) {
int OperandIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
+ TII->getOperandIdx(Opcode, R600::OpName::src0_X),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_W),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_X),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_W)
};
int NegIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
+ TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
};
int AbsIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
+ TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
};
for (unsigned i = 0; i < 8; i++) {
if (OperandIdx[i] < 0)
@@ -2157,7 +2221,7 @@ SDNode *R600TargetLowering::PostISelFold
SDValue &Src = Ops[OperandIdx[i] - 1];
SDValue &Neg = Ops[NegIdx[i] - 1];
SDValue &Abs = Ops[AbsIdx[i] - 1];
- bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+ bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
if (HasDst)
SelIdx--;
@@ -2165,7 +2229,7 @@ SDNode *R600TargetLowering::PostISelFold
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
}
- } else if (Opcode == AMDGPU::REG_SEQUENCE) {
+ } else if (Opcode == R600::REG_SEQUENCE) {
for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
SDValue &Src = Ops[i];
if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
@@ -2175,18 +2239,18 @@ SDNode *R600TargetLowering::PostISelFold
if (!TII->hasInstrModifiers(Opcode))
return Node;
int OperandIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
+ TII->getOperandIdx(Opcode, R600::OpName::src0),
+ TII->getOperandIdx(Opcode, R600::OpName::src1),
+ TII->getOperandIdx(Opcode, R600::OpName::src2)
};
int NegIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
+ TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
+ TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
};
int AbsIdx[] = {
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
- TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
+ TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
+ TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
-1
};
for (unsigned i = 0; i < 3; i++) {
@@ -2196,9 +2260,9 @@ SDNode *R600TargetLowering::PostISelFold
SDValue &Neg = Ops[NegIdx[i] - 1];
SDValue FakeAbs;
SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
- bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+ bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
- int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
+ int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
if (HasDst) {
SelIdx--;
ImmIdx--;
Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h Thu Jun 28 16:47:12 2018
@@ -23,6 +23,8 @@ class R600InstrInfo;
class R600Subtarget;
class R600TargetLowering final : public AMDGPUTargetLowering {
+
+ const R600Subtarget *Subtarget;
public:
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI);
@@ -36,6 +38,7 @@ public:
void ReplaceNodeResults(SDNode * N,
SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG) const override;
+ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
Modified: llvm/trunk/lib/Target/AMDGPU/R600InstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600InstrFormats.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600InstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600InstrFormats.td Thu Jun 28 16:47:12 2018
@@ -41,7 +41,7 @@ class InstR600 <dag outs, dag ins, strin
bit LDS_1A2D = 0;
let SubtargetPredicate = isR600toCayman;
- let Namespace = "AMDGPU";
+ let Namespace = "R600";
let OutOperandList = outs;
let InOperandList = ins;
let AsmString = asm;
Modified: llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.cpp Thu Jun 28 16:47:12 2018
@@ -45,10 +45,15 @@
using namespace llvm;
#define GET_INSTRINFO_CTOR_DTOR
-#include "AMDGPUGenDFAPacketizer.inc"
+#include "R600GenDFAPacketizer.inc"
+
+#define GET_INSTRINFO_CTOR_DTOR
+#define GET_INSTRMAP_INFO
+#define GET_INSTRINFO_NAMED_OPS
+#include "R600GenInstrInfo.inc"
R600InstrInfo::R600InstrInfo(const R600Subtarget &ST)
- : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
+ : R600GenInstrInfo(-1, -1), RI(), ST(ST) {}
bool R600InstrInfo::isVector(const MachineInstr &MI) const {
return get(MI.getOpcode()).TSFlags & R600_InstFlag::VECTOR;
@@ -59,31 +64,31 @@ void R600InstrInfo::copyPhysReg(MachineB
const DebugLoc &DL, unsigned DestReg,
unsigned SrcReg, bool KillSrc) const {
unsigned VectorComponents = 0;
- if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) ||
- AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) &&
- (AMDGPU::R600_Reg128RegClass.contains(SrcReg) ||
- AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) {
+ if ((R600::R600_Reg128RegClass.contains(DestReg) ||
+ R600::R600_Reg128VerticalRegClass.contains(DestReg)) &&
+ (R600::R600_Reg128RegClass.contains(SrcReg) ||
+ R600::R600_Reg128VerticalRegClass.contains(SrcReg))) {
VectorComponents = 4;
- } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) ||
- AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) &&
- (AMDGPU::R600_Reg64RegClass.contains(SrcReg) ||
- AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) {
+ } else if((R600::R600_Reg64RegClass.contains(DestReg) ||
+ R600::R600_Reg64VerticalRegClass.contains(DestReg)) &&
+ (R600::R600_Reg64RegClass.contains(SrcReg) ||
+ R600::R600_Reg64VerticalRegClass.contains(SrcReg))) {
VectorComponents = 2;
}
if (VectorComponents > 0) {
for (unsigned I = 0; I < VectorComponents; I++) {
unsigned SubRegIndex = AMDGPURegisterInfo::getSubRegFromChannel(I);
- buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+ buildDefaultInstruction(MBB, MI, R600::MOV,
RI.getSubReg(DestReg, SubRegIndex),
RI.getSubReg(SrcReg, SubRegIndex))
.addReg(DestReg,
RegState::Define | RegState::Implicit);
}
} else {
- MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, AMDGPU::MOV,
+ MachineInstr *NewMI = buildDefaultInstruction(MBB, MI, R600::MOV,
DestReg, SrcReg);
- NewMI->getOperand(getOperandIdx(*NewMI, AMDGPU::OpName::src0))
+ NewMI->getOperand(getOperandIdx(*NewMI, R600::OpName::src0))
.setIsKill(KillSrc);
}
}
@@ -104,9 +109,9 @@ bool R600InstrInfo::isMov(unsigned Opcod
switch(Opcode) {
default:
return false;
- case AMDGPU::MOV:
- case AMDGPU::MOV_IMM_F32:
- case AMDGPU::MOV_IMM_I32:
+ case R600::MOV:
+ case R600::MOV_IMM_F32:
+ case R600::MOV_IMM_I32:
return true;
}
}
@@ -118,10 +123,10 @@ bool R600InstrInfo::isReductionOp(unsign
bool R600InstrInfo::isCubeOp(unsigned Opcode) const {
switch(Opcode) {
default: return false;
- case AMDGPU::CUBE_r600_pseudo:
- case AMDGPU::CUBE_r600_real:
- case AMDGPU::CUBE_eg_pseudo:
- case AMDGPU::CUBE_eg_real:
+ case R600::CUBE_r600_pseudo:
+ case R600::CUBE_r600_real:
+ case R600::CUBE_eg_pseudo:
+ case R600::CUBE_eg_real:
return true;
}
}
@@ -149,7 +154,7 @@ bool R600InstrInfo::isLDSInstr(unsigned
}
bool R600InstrInfo::isLDSRetInstr(unsigned Opcode) const {
- return isLDSInstr(Opcode) && getOperandIdx(Opcode, AMDGPU::OpName::dst) != -1;
+ return isLDSInstr(Opcode) && getOperandIdx(Opcode, R600::OpName::dst) != -1;
}
bool R600InstrInfo::canBeConsideredALU(const MachineInstr &MI) const {
@@ -158,12 +163,12 @@ bool R600InstrInfo::canBeConsideredALU(c
if (isVector(MI) || isCubeOp(MI.getOpcode()))
return true;
switch (MI.getOpcode()) {
- case AMDGPU::PRED_X:
- case AMDGPU::INTERP_PAIR_XY:
- case AMDGPU::INTERP_PAIR_ZW:
- case AMDGPU::INTERP_VEC_LOAD:
- case AMDGPU::COPY:
- case AMDGPU::DOT_4:
+ case R600::PRED_X:
+ case R600::INTERP_PAIR_XY:
+ case R600::INTERP_PAIR_ZW:
+ case R600::INTERP_VEC_LOAD:
+ case R600::COPY:
+ case R600::DOT_4:
return true;
default:
return false;
@@ -173,7 +178,7 @@ bool R600InstrInfo::canBeConsideredALU(c
bool R600InstrInfo::isTransOnly(unsigned Opcode) const {
if (ST.hasCaymanISA())
return false;
- return (get(Opcode).getSchedClass() == AMDGPU::Sched::TransALU);
+ return (get(Opcode).getSchedClass() == R600::Sched::TransALU);
}
bool R600InstrInfo::isTransOnly(const MachineInstr &MI) const {
@@ -181,7 +186,7 @@ bool R600InstrInfo::isTransOnly(const Ma
}
bool R600InstrInfo::isVectorOnly(unsigned Opcode) const {
- return (get(Opcode).getSchedClass() == AMDGPU::Sched::VecALU);
+ return (get(Opcode).getSchedClass() == R600::Sched::VecALU);
}
bool R600InstrInfo::isVectorOnly(const MachineInstr &MI) const {
@@ -215,8 +220,8 @@ bool R600InstrInfo::usesTextureCache(con
bool R600InstrInfo::mustBeLastInClause(unsigned Opcode) const {
switch (Opcode) {
- case AMDGPU::KILLGT:
- case AMDGPU::GROUP_BARRIER:
+ case R600::KILLGT:
+ case R600::GROUP_BARRIER:
return true;
default:
return false;
@@ -224,11 +229,11 @@ bool R600InstrInfo::mustBeLastInClause(u
}
bool R600InstrInfo::usesAddressRegister(MachineInstr &MI) const {
- return MI.findRegisterUseOperandIdx(AMDGPU::AR_X) != -1;
+ return MI.findRegisterUseOperandIdx(R600::AR_X) != -1;
}
bool R600InstrInfo::definesAddressRegister(MachineInstr &MI) const {
- return MI.findRegisterDefOperandIdx(AMDGPU::AR_X) != -1;
+ return MI.findRegisterDefOperandIdx(R600::AR_X) != -1;
}
bool R600InstrInfo::readsLDSSrcReg(const MachineInstr &MI) const {
@@ -242,7 +247,7 @@ bool R600InstrInfo::readsLDSSrcReg(const
TargetRegisterInfo::isVirtualRegister(I->getReg()))
continue;
- if (AMDGPU::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
+ if (R600::R600_LDS_SRC_REGRegClass.contains(I->getReg()))
return true;
}
return false;
@@ -250,17 +255,17 @@ bool R600InstrInfo::readsLDSSrcReg(const
int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
static const unsigned SrcSelTable[][2] = {
- {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
- {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
- {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
- {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
- {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
- {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
- {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
- {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
- {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
- {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
- {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
+ {R600::OpName::src0, R600::OpName::src0_sel},
+ {R600::OpName::src1, R600::OpName::src1_sel},
+ {R600::OpName::src2, R600::OpName::src2_sel},
+ {R600::OpName::src0_X, R600::OpName::src0_sel_X},
+ {R600::OpName::src0_Y, R600::OpName::src0_sel_Y},
+ {R600::OpName::src0_Z, R600::OpName::src0_sel_Z},
+ {R600::OpName::src0_W, R600::OpName::src0_sel_W},
+ {R600::OpName::src1_X, R600::OpName::src1_sel_X},
+ {R600::OpName::src1_Y, R600::OpName::src1_sel_Y},
+ {R600::OpName::src1_Z, R600::OpName::src1_sel_Z},
+ {R600::OpName::src1_W, R600::OpName::src1_sel_W}
};
for (const auto &Row : SrcSelTable) {
@@ -275,23 +280,23 @@ SmallVector<std::pair<MachineOperand *,
R600InstrInfo::getSrcs(MachineInstr &MI) const {
SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;
- if (MI.getOpcode() == AMDGPU::DOT_4) {
+ if (MI.getOpcode() == R600::DOT_4) {
static const unsigned OpTable[8][2] = {
- {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
- {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
- {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
- {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
- {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
- {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
- {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
- {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W},
+ {R600::OpName::src0_X, R600::OpName::src0_sel_X},
+ {R600::OpName::src0_Y, R600::OpName::src0_sel_Y},
+ {R600::OpName::src0_Z, R600::OpName::src0_sel_Z},
+ {R600::OpName::src0_W, R600::OpName::src0_sel_W},
+ {R600::OpName::src1_X, R600::OpName::src1_sel_X},
+ {R600::OpName::src1_Y, R600::OpName::src1_sel_Y},
+ {R600::OpName::src1_Z, R600::OpName::src1_sel_Z},
+ {R600::OpName::src1_W, R600::OpName::src1_sel_W},
};
for (unsigned j = 0; j < 8; j++) {
MachineOperand &MO =
MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][0]));
unsigned Reg = MO.getReg();
- if (Reg == AMDGPU::ALU_CONST) {
+ if (Reg == R600::ALU_CONST) {
MachineOperand &Sel =
MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
Result.push_back(std::make_pair(&MO, Sel.getImm()));
@@ -303,9 +308,9 @@ R600InstrInfo::getSrcs(MachineInstr &MI)
}
static const unsigned OpTable[3][2] = {
- {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
- {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
- {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
+ {R600::OpName::src0, R600::OpName::src0_sel},
+ {R600::OpName::src1, R600::OpName::src1_sel},
+ {R600::OpName::src2, R600::OpName::src2_sel},
};
for (unsigned j = 0; j < 3; j++) {
@@ -314,15 +319,15 @@ R600InstrInfo::getSrcs(MachineInstr &MI)
break;
MachineOperand &MO = MI.getOperand(SrcIdx);
unsigned Reg = MO.getReg();
- if (Reg == AMDGPU::ALU_CONST) {
+ if (Reg == R600::ALU_CONST) {
MachineOperand &Sel =
MI.getOperand(getOperandIdx(MI.getOpcode(), OpTable[j][1]));
Result.push_back(std::make_pair(&MO, Sel.getImm()));
continue;
}
- if (Reg == AMDGPU::ALU_LITERAL_X) {
+ if (Reg == R600::ALU_LITERAL_X) {
MachineOperand &Operand =
- MI.getOperand(getOperandIdx(MI.getOpcode(), AMDGPU::OpName::literal));
+ MI.getOperand(getOperandIdx(MI.getOpcode(), R600::OpName::literal));
if (Operand.isImm()) {
Result.push_back(std::make_pair(&MO, Operand.getImm()));
continue;
@@ -346,7 +351,7 @@ R600InstrInfo::ExtractSrcs(MachineInstr
++i;
unsigned Reg = Src.first->getReg();
int Index = RI.getEncodingValue(Reg) & 0xff;
- if (Reg == AMDGPU::OQAP) {
+ if (Reg == R600::OQAP) {
Result.push_back(std::make_pair(Index, 0U));
}
if (PV.find(Reg) != PV.end()) {
@@ -436,7 +441,7 @@ unsigned R600InstrInfo::isLegalUpTo(
const std::pair<int, unsigned> &Src = Srcs[j];
if (Src.first < 0 || Src.first == 255)
continue;
- if (Src.first == GET_REG_INDEX(RI.getEncodingValue(AMDGPU::OQAP))) {
+ if (Src.first == GET_REG_INDEX(RI.getEncodingValue(R600::OQAP))) {
if (Swz[i] != R600InstrInfo::ALU_VEC_012_SCL_210 &&
Swz[i] != R600InstrInfo::ALU_VEC_021_SCL_122) {
// The value from output queue A (denoted by register OQAP) can
@@ -542,7 +547,7 @@ R600InstrInfo::fitsReadPortLimitations(c
for (unsigned i = 0, e = IG.size(); i < e; ++i) {
IGSrcs.push_back(ExtractSrcs(*IG[i], PV, ConstCount));
unsigned Op = getOperandIdx(IG[i]->getOpcode(),
- AMDGPU::OpName::bank_swizzle);
+ R600::OpName::bank_swizzle);
ValidSwizzle.push_back( (R600InstrInfo::BankSwizzle)
IG[i]->getOperand(Op).getImm());
}
@@ -611,14 +616,14 @@ R600InstrInfo::fitsConstReadLimitations(
continue;
for (const auto &Src : getSrcs(MI)) {
- if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
+ if (Src.first->getReg() == R600::ALU_LITERAL_X)
Literals.insert(Src.second);
if (Literals.size() > 4)
return false;
- if (Src.first->getReg() == AMDGPU::ALU_CONST)
+ if (Src.first->getReg() == R600::ALU_CONST)
Consts.push_back(Src.second);
- if (AMDGPU::R600_KC0RegClass.contains(Src.first->getReg()) ||
- AMDGPU::R600_KC1RegClass.contains(Src.first->getReg())) {
+ if (R600::R600_KC0RegClass.contains(Src.first->getReg()) ||
+ R600::R600_KC1RegClass.contains(Src.first->getReg())) {
unsigned Index = RI.getEncodingValue(Src.first->getReg()) & 0xff;
unsigned Chan = RI.getHWRegChan(Src.first->getReg());
Consts.push_back((Index << 2) | Chan);
@@ -637,7 +642,7 @@ R600InstrInfo::CreateTargetScheduleState
static bool
isPredicateSetter(unsigned Opcode) {
switch (Opcode) {
- case AMDGPU::PRED_X:
+ case R600::PRED_X:
return true;
default:
return false;
@@ -659,12 +664,12 @@ findFirstPredicateSetterFrom(MachineBasi
static
bool isJump(unsigned Opcode) {
- return Opcode == AMDGPU::JUMP || Opcode == AMDGPU::JUMP_COND;
+ return Opcode == R600::JUMP || Opcode == R600::JUMP_COND;
}
static bool isBranch(unsigned Opcode) {
- return Opcode == AMDGPU::BRANCH || Opcode == AMDGPU::BRANCH_COND_i32 ||
- Opcode == AMDGPU::BRANCH_COND_f32;
+ return Opcode == R600::BRANCH || Opcode == R600::BRANCH_COND_i32 ||
+ Opcode == R600::BRANCH_COND_f32;
}
bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
@@ -679,7 +684,7 @@ bool R600InstrInfo::analyzeBranch(Machin
if (I == MBB.end())
return false;
- // AMDGPU::BRANCH* instructions are only available after isel and are not
+ // R600::BRANCH* instructions are only available after isel and are not
// handled
if (isBranch(I->getOpcode()))
return true;
@@ -688,7 +693,7 @@ bool R600InstrInfo::analyzeBranch(Machin
}
// Remove successive JUMP
- while (I != MBB.begin() && std::prev(I)->getOpcode() == AMDGPU::JUMP) {
+ while (I != MBB.begin() && std::prev(I)->getOpcode() == R600::JUMP) {
MachineBasicBlock::iterator PriorI = std::prev(I);
if (AllowModify)
I->removeFromParent();
@@ -699,10 +704,10 @@ bool R600InstrInfo::analyzeBranch(Machin
// If there is only one terminator instruction, process it.
unsigned LastOpc = LastInst.getOpcode();
if (I == MBB.begin() || !isJump((--I)->getOpcode())) {
- if (LastOpc == AMDGPU::JUMP) {
+ if (LastOpc == R600::JUMP) {
TBB = LastInst.getOperand(0).getMBB();
return false;
- } else if (LastOpc == AMDGPU::JUMP_COND) {
+ } else if (LastOpc == R600::JUMP_COND) {
auto predSet = I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
@@ -710,7 +715,7 @@ bool R600InstrInfo::analyzeBranch(Machin
TBB = LastInst.getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
- Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ Cond.push_back(MachineOperand::CreateReg(R600::PRED_SEL_ONE, false));
return false;
}
return true; // Can't handle indirect branch.
@@ -721,7 +726,7 @@ bool R600InstrInfo::analyzeBranch(Machin
unsigned SecondLastOpc = SecondLastInst.getOpcode();
// If the block ends with a B and a Bcc, handle it.
- if (SecondLastOpc == AMDGPU::JUMP_COND && LastOpc == AMDGPU::JUMP) {
+ if (SecondLastOpc == R600::JUMP_COND && LastOpc == R600::JUMP) {
auto predSet = --I;
while (!isPredicateSetter(predSet->getOpcode())) {
predSet = --I;
@@ -730,7 +735,7 @@ bool R600InstrInfo::analyzeBranch(Machin
FBB = LastInst.getOperand(0).getMBB();
Cond.push_back(predSet->getOperand(1));
Cond.push_back(predSet->getOperand(2));
- Cond.push_back(MachineOperand::CreateReg(AMDGPU::PRED_SEL_ONE, false));
+ Cond.push_back(MachineOperand::CreateReg(R600::PRED_SEL_ONE, false));
return false;
}
@@ -742,8 +747,8 @@ static
MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend();
It != E; ++It) {
- if (It->getOpcode() == AMDGPU::CF_ALU ||
- It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
+ if (It->getOpcode() == R600::CF_ALU ||
+ It->getOpcode() == R600::CF_ALU_PUSH_BEFORE)
return It.getReverse();
}
return MBB.end();
@@ -760,7 +765,7 @@ unsigned R600InstrInfo::insertBranch(Mac
if (!FBB) {
if (Cond.empty()) {
- BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(TBB);
+ BuildMI(&MBB, DL, get(R600::JUMP)).addMBB(TBB);
return 1;
} else {
MachineInstr *PredSet = findFirstPredicateSetterFrom(MBB, MBB.end());
@@ -768,14 +773,14 @@ unsigned R600InstrInfo::insertBranch(Mac
addFlag(*PredSet, 0, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
- BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
+ BuildMI(&MBB, DL, get(R600::JUMP_COND))
.addMBB(TBB)
- .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
+ .addReg(R600::PREDICATE_BIT, RegState::Kill);
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
return 1;
- assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
- CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
+ assert (CfAlu->getOpcode() == R600::CF_ALU);
+ CfAlu->setDesc(get(R600::CF_ALU_PUSH_BEFORE));
return 1;
}
} else {
@@ -783,15 +788,15 @@ unsigned R600InstrInfo::insertBranch(Mac
assert(PredSet && "No previous predicate !");
addFlag(*PredSet, 0, MO_FLAG_PUSH);
PredSet->getOperand(2).setImm(Cond[1].getImm());
- BuildMI(&MBB, DL, get(AMDGPU::JUMP_COND))
+ BuildMI(&MBB, DL, get(R600::JUMP_COND))
.addMBB(TBB)
- .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
- BuildMI(&MBB, DL, get(AMDGPU::JUMP)).addMBB(FBB);
+ .addReg(R600::PREDICATE_BIT, RegState::Kill);
+ BuildMI(&MBB, DL, get(R600::JUMP)).addMBB(FBB);
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
return 2;
- assert (CfAlu->getOpcode() == AMDGPU::CF_ALU);
- CfAlu->setDesc(get(AMDGPU::CF_ALU_PUSH_BEFORE));
+ assert (CfAlu->getOpcode() == R600::CF_ALU);
+ CfAlu->setDesc(get(R600::CF_ALU_PUSH_BEFORE));
return 2;
}
}
@@ -812,18 +817,18 @@ unsigned R600InstrInfo::removeBranch(Mac
switch (I->getOpcode()) {
default:
return 0;
- case AMDGPU::JUMP_COND: {
+ case R600::JUMP_COND: {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(*predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
break;
- assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
- CfAlu->setDesc(get(AMDGPU::CF_ALU));
+ assert (CfAlu->getOpcode() == R600::CF_ALU_PUSH_BEFORE);
+ CfAlu->setDesc(get(R600::CF_ALU));
break;
}
- case AMDGPU::JUMP:
+ case R600::JUMP:
I->eraseFromParent();
break;
}
@@ -837,18 +842,18 @@ unsigned R600InstrInfo::removeBranch(Mac
// FIXME: only one case??
default:
return 1;
- case AMDGPU::JUMP_COND: {
+ case R600::JUMP_COND: {
MachineInstr *predSet = findFirstPredicateSetterFrom(MBB, I);
clearFlag(*predSet, 0, MO_FLAG_PUSH);
I->eraseFromParent();
MachineBasicBlock::iterator CfAlu = FindLastAluClause(MBB);
if (CfAlu == MBB.end())
break;
- assert (CfAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE);
- CfAlu->setDesc(get(AMDGPU::CF_ALU));
+ assert (CfAlu->getOpcode() == R600::CF_ALU_PUSH_BEFORE);
+ CfAlu->setDesc(get(R600::CF_ALU));
break;
}
- case AMDGPU::JUMP:
+ case R600::JUMP:
I->eraseFromParent();
break;
}
@@ -863,9 +868,9 @@ bool R600InstrInfo::isPredicated(const M
unsigned Reg = MI.getOperand(idx).getReg();
switch (Reg) {
default: return false;
- case AMDGPU::PRED_SEL_ONE:
- case AMDGPU::PRED_SEL_ZERO:
- case AMDGPU::PREDICATE_BIT:
+ case R600::PRED_SEL_ONE:
+ case R600::PRED_SEL_ZERO:
+ case R600::PREDICATE_BIT:
return true;
}
}
@@ -876,9 +881,9 @@ bool R600InstrInfo::isPredicable(const M
// be predicated. Until we have proper support for instruction clauses in the
// backend, we will mark KILL* instructions as unpredicable.
- if (MI.getOpcode() == AMDGPU::KILLGT) {
+ if (MI.getOpcode() == R600::KILLGT) {
return false;
- } else if (MI.getOpcode() == AMDGPU::CF_ALU) {
+ } else if (MI.getOpcode() == R600::CF_ALU) {
// If the clause start in the middle of MBB then the MBB has more
// than a single clause, unable to predicate several clauses.
if (MI.getParent()->begin() != MachineBasicBlock::const_iterator(MI))
@@ -888,7 +893,7 @@ bool R600InstrInfo::isPredicable(const M
} else if (isVector(MI)) {
return false;
} else {
- return AMDGPUInstrInfo::isPredicable(MI);
+ return TargetInstrInfo::isPredicable(MI);
}
}
@@ -929,17 +934,17 @@ bool
R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
MachineOperand &MO = Cond[1];
switch (MO.getImm()) {
- case AMDGPU::PRED_SETE_INT:
- MO.setImm(AMDGPU::PRED_SETNE_INT);
+ case R600::PRED_SETE_INT:
+ MO.setImm(R600::PRED_SETNE_INT);
break;
- case AMDGPU::PRED_SETNE_INT:
- MO.setImm(AMDGPU::PRED_SETE_INT);
+ case R600::PRED_SETNE_INT:
+ MO.setImm(R600::PRED_SETE_INT);
break;
- case AMDGPU::PRED_SETE:
- MO.setImm(AMDGPU::PRED_SETNE);
+ case R600::PRED_SETE:
+ MO.setImm(R600::PRED_SETNE);
break;
- case AMDGPU::PRED_SETNE:
- MO.setImm(AMDGPU::PRED_SETE);
+ case R600::PRED_SETNE:
+ MO.setImm(R600::PRED_SETE);
break;
default:
return true;
@@ -947,11 +952,11 @@ R600InstrInfo::reverseBranchCondition(Sm
MachineOperand &MO2 = Cond[2];
switch (MO2.getReg()) {
- case AMDGPU::PRED_SEL_ZERO:
- MO2.setReg(AMDGPU::PRED_SEL_ONE);
+ case R600::PRED_SEL_ZERO:
+ MO2.setReg(R600::PRED_SEL_ONE);
break;
- case AMDGPU::PRED_SEL_ONE:
- MO2.setReg(AMDGPU::PRED_SEL_ZERO);
+ case R600::PRED_SEL_ONE:
+ MO2.setReg(R600::PRED_SEL_ZERO);
break;
default:
return true;
@@ -968,22 +973,22 @@ bool R600InstrInfo::PredicateInstruction
ArrayRef<MachineOperand> Pred) const {
int PIdx = MI.findFirstPredOperandIdx();
- if (MI.getOpcode() == AMDGPU::CF_ALU) {
+ if (MI.getOpcode() == R600::CF_ALU) {
MI.getOperand(8).setImm(0);
return true;
}
- if (MI.getOpcode() == AMDGPU::DOT_4) {
- MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_X))
+ if (MI.getOpcode() == R600::DOT_4) {
+ MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_X))
.setReg(Pred[2].getReg());
- MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Y))
+ MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_Y))
.setReg(Pred[2].getReg());
- MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_Z))
+ MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_Z))
.setReg(Pred[2].getReg());
- MI.getOperand(getOperandIdx(MI, AMDGPU::OpName::pred_sel_W))
+ MI.getOperand(getOperandIdx(MI, R600::OpName::pred_sel_W))
.setReg(Pred[2].getReg());
MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
- MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+ MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit);
return true;
}
@@ -991,7 +996,7 @@ bool R600InstrInfo::PredicateInstruction
MachineOperand &PMO = MI.getOperand(PIdx);
PMO.setReg(Pred[2].getReg());
MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
- MIB.addReg(AMDGPU::PREDICATE_BIT, RegState::Implicit);
+ MIB.addReg(R600::PREDICATE_BIT, RegState::Implicit);
return true;
}
@@ -1021,20 +1026,20 @@ bool R600InstrInfo::expandPostRAPseudo(M
default: {
MachineBasicBlock *MBB = MI.getParent();
int OffsetOpIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::addr);
+ R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::addr);
// addr is a custom operand with multiple MI operands, and only the
// first MI operand is given a name.
int RegOpIdx = OffsetOpIdx + 1;
int ChanOpIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::chan);
+ R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::chan);
if (isRegisterLoad(MI)) {
int DstOpIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
+ R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::dst);
unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
- if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+ if (OffsetReg == R600::INDIRECT_BASE_ADDR) {
buildMovInstr(MBB, MI, MI.getOperand(DstOpIdx).getReg(),
getIndirectAddrRegClass()->getRegister(Address));
} else {
@@ -1043,12 +1048,12 @@ bool R600InstrInfo::expandPostRAPseudo(M
}
} else if (isRegisterStore(MI)) {
int ValOpIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::val);
+ R600::getNamedOperandIdx(MI.getOpcode(), R600::OpName::val);
unsigned RegIndex = MI.getOperand(RegOpIdx).getImm();
unsigned Channel = MI.getOperand(ChanOpIdx).getImm();
unsigned Address = calculateIndirectAddress(RegIndex, Channel);
unsigned OffsetReg = MI.getOperand(OffsetOpIdx).getReg();
- if (OffsetReg == AMDGPU::INDIRECT_BASE_ADDR) {
+ if (OffsetReg == R600::INDIRECT_BASE_ADDR) {
buildMovInstr(MBB, MI, getIndirectAddrRegClass()->getRegister(Address),
MI.getOperand(ValOpIdx).getReg());
} else {
@@ -1063,15 +1068,15 @@ bool R600InstrInfo::expandPostRAPseudo(M
MBB->erase(MI);
return true;
}
- case AMDGPU::R600_EXTRACT_ELT_V2:
- case AMDGPU::R600_EXTRACT_ELT_V4:
+ case R600::R600_EXTRACT_ELT_V2:
+ case R600::R600_EXTRACT_ELT_V4:
buildIndirectRead(MI.getParent(), MI, MI.getOperand(0).getReg(),
RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address
MI.getOperand(2).getReg(),
RI.getHWRegChan(MI.getOperand(1).getReg()));
break;
- case AMDGPU::R600_INSERT_ELT_V2:
- case AMDGPU::R600_INSERT_ELT_V4:
+ case R600::R600_INSERT_ELT_V2:
+ case R600::R600_INSERT_ELT_V4:
buildIndirectWrite(MI.getParent(), MI, MI.getOperand(2).getReg(), // Value
RI.getHWRegIndex(MI.getOperand(1).getReg()), // Address
MI.getOperand(3).getReg(), // Offset
@@ -1096,14 +1101,14 @@ void R600InstrInfo::reserveIndirectRegis
for (int Index = getIndirectIndexBegin(MF); Index <= End; ++Index) {
for (unsigned Chan = 0; Chan < StackWidth; ++Chan) {
- unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
+ unsigned Reg = R600::R600_TReg32RegClass.getRegister((4 * Index) + Chan);
TRI.reserveRegisterTuples(Reserved, Reg);
}
}
}
const TargetRegisterClass *R600InstrInfo::getIndirectAddrRegClass() const {
- return &AMDGPU::R600_TReg32_XRegClass;
+ return &R600::R600_TReg32_XRegClass;
}
MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB,
@@ -1121,20 +1126,20 @@ MachineInstrBuilder R600InstrInfo::build
unsigned AddrReg;
switch (AddrChan) {
default: llvm_unreachable("Invalid Channel");
- case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
- case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
- case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
- case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
- }
- MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
- AMDGPU::AR_X, OffsetReg);
- setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
+ case 0: AddrReg = R600::R600_AddrRegClass.getRegister(Address); break;
+ case 1: AddrReg = R600::R600_Addr_YRegClass.getRegister(Address); break;
+ case 2: AddrReg = R600::R600_Addr_ZRegClass.getRegister(Address); break;
+ case 3: AddrReg = R600::R600_Addr_WRegClass.getRegister(Address); break;
+ }
+ MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, R600::MOVA_INT_eg,
+ R600::AR_X, OffsetReg);
+ setImmOperand(*MOVA, R600::OpName::write, 0);
- MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+ MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, R600::MOV,
AddrReg, ValueReg)
- .addReg(AMDGPU::AR_X,
+ .addReg(R600::AR_X,
RegState::Implicit | RegState::Kill);
- setImmOperand(*Mov, AMDGPU::OpName::dst_rel, 1);
+ setImmOperand(*Mov, R600::OpName::dst_rel, 1);
return Mov;
}
@@ -1153,21 +1158,21 @@ MachineInstrBuilder R600InstrInfo::build
unsigned AddrReg;
switch (AddrChan) {
default: llvm_unreachable("Invalid Channel");
- case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break;
- case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break;
- case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break;
- case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break;
+ case 0: AddrReg = R600::R600_AddrRegClass.getRegister(Address); break;
+ case 1: AddrReg = R600::R600_Addr_YRegClass.getRegister(Address); break;
+ case 2: AddrReg = R600::R600_Addr_ZRegClass.getRegister(Address); break;
+ case 3: AddrReg = R600::R600_Addr_WRegClass.getRegister(Address); break;
}
- MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg,
- AMDGPU::AR_X,
+ MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, R600::MOVA_INT_eg,
+ R600::AR_X,
OffsetReg);
- setImmOperand(*MOVA, AMDGPU::OpName::write, 0);
- MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, AMDGPU::MOV,
+ setImmOperand(*MOVA, R600::OpName::write, 0);
+ MachineInstrBuilder Mov = buildDefaultInstruction(*MBB, I, R600::MOV,
ValueReg,
AddrReg)
- .addReg(AMDGPU::AR_X,
+ .addReg(R600::AR_X,
RegState::Implicit | RegState::Kill);
- setImmOperand(*Mov, AMDGPU::OpName::src0_rel, 1);
+ setImmOperand(*Mov, R600::OpName::src0_rel, 1);
return Mov;
}
@@ -1265,7 +1270,7 @@ MachineInstrBuilder R600InstrInfo::build
//XXX: The r600g finalizer expects this to be 1, once we've moved the
//scheduling to the backend, we can change the default to 0.
MIB.addImm(1) // $last
- .addReg(AMDGPU::PRED_SEL_OFF) // $pred_sel
+ .addReg(R600::PRED_SEL_OFF) // $pred_sel
.addImm(0) // $literal
.addImm(0); // $bank_swizzle
@@ -1286,23 +1291,23 @@ MachineInstrBuilder R600InstrInfo::build
static unsigned getSlotedOps(unsigned Op, unsigned Slot) {
switch (Op) {
- OPERAND_CASE(AMDGPU::OpName::update_exec_mask)
- OPERAND_CASE(AMDGPU::OpName::update_pred)
- OPERAND_CASE(AMDGPU::OpName::write)
- OPERAND_CASE(AMDGPU::OpName::omod)
- OPERAND_CASE(AMDGPU::OpName::dst_rel)
- OPERAND_CASE(AMDGPU::OpName::clamp)
- OPERAND_CASE(AMDGPU::OpName::src0)
- OPERAND_CASE(AMDGPU::OpName::src0_neg)
- OPERAND_CASE(AMDGPU::OpName::src0_rel)
- OPERAND_CASE(AMDGPU::OpName::src0_abs)
- OPERAND_CASE(AMDGPU::OpName::src0_sel)
- OPERAND_CASE(AMDGPU::OpName::src1)
- OPERAND_CASE(AMDGPU::OpName::src1_neg)
- OPERAND_CASE(AMDGPU::OpName::src1_rel)
- OPERAND_CASE(AMDGPU::OpName::src1_abs)
- OPERAND_CASE(AMDGPU::OpName::src1_sel)
- OPERAND_CASE(AMDGPU::OpName::pred_sel)
+ OPERAND_CASE(R600::OpName::update_exec_mask)
+ OPERAND_CASE(R600::OpName::update_pred)
+ OPERAND_CASE(R600::OpName::write)
+ OPERAND_CASE(R600::OpName::omod)
+ OPERAND_CASE(R600::OpName::dst_rel)
+ OPERAND_CASE(R600::OpName::clamp)
+ OPERAND_CASE(R600::OpName::src0)
+ OPERAND_CASE(R600::OpName::src0_neg)
+ OPERAND_CASE(R600::OpName::src0_rel)
+ OPERAND_CASE(R600::OpName::src0_abs)
+ OPERAND_CASE(R600::OpName::src0_sel)
+ OPERAND_CASE(R600::OpName::src1)
+ OPERAND_CASE(R600::OpName::src1_neg)
+ OPERAND_CASE(R600::OpName::src1_rel)
+ OPERAND_CASE(R600::OpName::src1_abs)
+ OPERAND_CASE(R600::OpName::src1_sel)
+ OPERAND_CASE(R600::OpName::pred_sel)
default:
llvm_unreachable("Wrong Operand");
}
@@ -1313,39 +1318,39 @@ static unsigned getSlotedOps(unsigned O
MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction(
MachineBasicBlock &MBB, MachineInstr *MI, unsigned Slot, unsigned DstReg)
const {
- assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented");
+ assert (MI->getOpcode() == R600::DOT_4 && "Not Implemented");
unsigned Opcode;
if (ST.getGeneration() <= R600Subtarget::R700)
- Opcode = AMDGPU::DOT4_r600;
+ Opcode = R600::DOT4_r600;
else
- Opcode = AMDGPU::DOT4_eg;
+ Opcode = R600::DOT4_eg;
MachineBasicBlock::iterator I = MI;
MachineOperand &Src0 = MI->getOperand(
- getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src0, Slot)));
+ getOperandIdx(MI->getOpcode(), getSlotedOps(R600::OpName::src0, Slot)));
MachineOperand &Src1 = MI->getOperand(
- getOperandIdx(MI->getOpcode(), getSlotedOps(AMDGPU::OpName::src1, Slot)));
+ getOperandIdx(MI->getOpcode(), getSlotedOps(R600::OpName::src1, Slot)));
MachineInstr *MIB = buildDefaultInstruction(
MBB, I, Opcode, DstReg, Src0.getReg(), Src1.getReg());
static const unsigned Operands[14] = {
- AMDGPU::OpName::update_exec_mask,
- AMDGPU::OpName::update_pred,
- AMDGPU::OpName::write,
- AMDGPU::OpName::omod,
- AMDGPU::OpName::dst_rel,
- AMDGPU::OpName::clamp,
- AMDGPU::OpName::src0_neg,
- AMDGPU::OpName::src0_rel,
- AMDGPU::OpName::src0_abs,
- AMDGPU::OpName::src0_sel,
- AMDGPU::OpName::src1_neg,
- AMDGPU::OpName::src1_rel,
- AMDGPU::OpName::src1_abs,
- AMDGPU::OpName::src1_sel,
+ R600::OpName::update_exec_mask,
+ R600::OpName::update_pred,
+ R600::OpName::write,
+ R600::OpName::omod,
+ R600::OpName::dst_rel,
+ R600::OpName::clamp,
+ R600::OpName::src0_neg,
+ R600::OpName::src0_rel,
+ R600::OpName::src0_abs,
+ R600::OpName::src0_sel,
+ R600::OpName::src1_neg,
+ R600::OpName::src1_rel,
+ R600::OpName::src1_abs,
+ R600::OpName::src1_sel,
};
MachineOperand &MO = MI->getOperand(getOperandIdx(MI->getOpcode(),
- getSlotedOps(AMDGPU::OpName::pred_sel, Slot)));
- MIB->getOperand(getOperandIdx(Opcode, AMDGPU::OpName::pred_sel))
+ getSlotedOps(R600::OpName::pred_sel, Slot)));
+ MIB->getOperand(getOperandIdx(Opcode, R600::OpName::pred_sel))
.setReg(MO.getReg());
for (unsigned i = 0; i < 14; i++) {
@@ -1362,16 +1367,16 @@ MachineInstr *R600InstrInfo::buildMovImm
MachineBasicBlock::iterator I,
unsigned DstReg,
uint64_t Imm) const {
- MachineInstr *MovImm = buildDefaultInstruction(BB, I, AMDGPU::MOV, DstReg,
- AMDGPU::ALU_LITERAL_X);
- setImmOperand(*MovImm, AMDGPU::OpName::literal, Imm);
+ MachineInstr *MovImm = buildDefaultInstruction(BB, I, R600::MOV, DstReg,
+ R600::ALU_LITERAL_X);
+ setImmOperand(*MovImm, R600::OpName::literal, Imm);
return MovImm;
}
MachineInstr *R600InstrInfo::buildMovInstr(MachineBasicBlock *MBB,
MachineBasicBlock::iterator I,
unsigned DstReg, unsigned SrcReg) const {
- return buildDefaultInstruction(*MBB, I, AMDGPU::MOV, DstReg, SrcReg);
+ return buildDefaultInstruction(*MBB, I, R600::MOV, DstReg, SrcReg);
}
int R600InstrInfo::getOperandIdx(const MachineInstr &MI, unsigned Op) const {
@@ -1379,7 +1384,7 @@ int R600InstrInfo::getOperandIdx(const M
}
int R600InstrInfo::getOperandIdx(unsigned Opcode, unsigned Op) const {
- return AMDGPU::getNamedOperandIdx(Opcode, Op);
+ return R600::getNamedOperandIdx(Opcode, Op);
}
void R600InstrInfo::setImmOperand(MachineInstr &MI, unsigned Op,
@@ -1406,25 +1411,25 @@ MachineOperand &R600InstrInfo::getFlagOp
bool IsOP3 = (TargetFlags & R600_InstFlag::OP3) == R600_InstFlag::OP3;
switch (Flag) {
case MO_FLAG_CLAMP:
- FlagIndex = getOperandIdx(MI, AMDGPU::OpName::clamp);
+ FlagIndex = getOperandIdx(MI, R600::OpName::clamp);
break;
case MO_FLAG_MASK:
- FlagIndex = getOperandIdx(MI, AMDGPU::OpName::write);
+ FlagIndex = getOperandIdx(MI, R600::OpName::write);
break;
case MO_FLAG_NOT_LAST:
case MO_FLAG_LAST:
- FlagIndex = getOperandIdx(MI, AMDGPU::OpName::last);
+ FlagIndex = getOperandIdx(MI, R600::OpName::last);
break;
case MO_FLAG_NEG:
switch (SrcIdx) {
case 0:
- FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_neg);
+ FlagIndex = getOperandIdx(MI, R600::OpName::src0_neg);
break;
case 1:
- FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_neg);
+ FlagIndex = getOperandIdx(MI, R600::OpName::src1_neg);
break;
case 2:
- FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src2_neg);
+ FlagIndex = getOperandIdx(MI, R600::OpName::src2_neg);
break;
}
break;
@@ -1435,10 +1440,10 @@ MachineOperand &R600InstrInfo::getFlagOp
(void)IsOP3;
switch (SrcIdx) {
case 0:
- FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src0_abs);
+ FlagIndex = getOperandIdx(MI, R600::OpName::src0_abs);
break;
case 1:
- FlagIndex = getOperandIdx(MI, AMDGPU::OpName::src1_abs);
+ FlagIndex = getOperandIdx(MI, R600::OpName::src1_abs);
break;
}
break;
@@ -1499,15 +1504,15 @@ unsigned R600InstrInfo::getAddressSpaceF
switch (Kind) {
case PseudoSourceValue::Stack:
case PseudoSourceValue::FixedStack:
- return AMDGPUASI.PRIVATE_ADDRESS;
+ return ST.getAMDGPUAS().PRIVATE_ADDRESS;
case PseudoSourceValue::ConstantPool:
case PseudoSourceValue::GOT:
case PseudoSourceValue::JumpTable:
case PseudoSourceValue::GlobalValueCallEntry:
case PseudoSourceValue::ExternalSymbolCallEntry:
case PseudoSourceValue::TargetCustom:
- return AMDGPUASI.CONSTANT_ADDRESS;
+ return ST.getAMDGPUAS().CONSTANT_ADDRESS;
}
llvm_unreachable("Invalid pseudo source kind");
- return AMDGPUASI.PRIVATE_ADDRESS;
+ return ST.getAMDGPUAS().PRIVATE_ADDRESS;
}
Modified: llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600InstrInfo.h Thu Jun 28 16:47:12 2018
@@ -15,8 +15,11 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H
#define LLVM_LIB_TARGET_AMDGPU_R600INSTRINFO_H
-#include "AMDGPUInstrInfo.h"
#include "R600RegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+
+#define GET_INSTRINFO_HEADER
+#include "R600GenInstrInfo.inc"
namespace llvm {
@@ -34,7 +37,7 @@ class MachineInstr;
class MachineInstrBuilder;
class R600Subtarget;
-class R600InstrInfo final : public AMDGPUInstrInfo {
+class R600InstrInfo final : public R600GenInstrInfo {
private:
const R600RegisterInfo RI;
const R600Subtarget &ST;
@@ -324,7 +327,7 @@ public:
PseudoSourceValue::PSVKind Kind) const override;
};
-namespace AMDGPU {
+namespace R600 {
int getLDSNoRetOp(uint16_t Opcode);
Modified: llvm/trunk/lib/Target/AMDGPU/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600Instructions.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600Instructions.td Thu Jun 28 16:47:12 2018
@@ -18,13 +18,13 @@ include "R600InstrFormats.td"
class R600WrapperInst <dag outs, dag ins, string asm = "", list<dag> pattern = []> :
AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
let SubtargetPredicate = isR600toCayman;
+ let Namespace = "R600";
}
class InstR600ISA <dag outs, dag ins, string asm, list<dag> pattern = []> :
InstR600 <outs, ins, asm, pattern, NullALU> {
- let Namespace = "AMDGPU";
}
def MEMxi : Operand<iPTR> {
@@ -86,6 +86,12 @@ def ADDRIndirect : ComplexPattern<iPTR,
def R600_Pred : PredicateOperand<i32, (ops R600_Predicate),
(ops PRED_SEL_OFF)>;
+let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
+ usesCustomInserter = 1, Namespace = "R600" in {
+ def RETURN : ILFormat<(outs), (ins variable_ops),
+ "RETURN", [(AMDGPUendpgm)]
+ >;
+}
let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in {
@@ -219,34 +225,6 @@ class R600_REDUCTION <bits<11> inst, dag
} // End mayLoad = 1, mayStore = 0, hasSideEffects = 0
-def TEX_SHADOW : PatLeaf<
- (imm),
- [{uint32_t TType = (uint32_t)N->getZExtValue();
- return (TType >= 6 && TType <= 8) || TType == 13;
- }]
->;
-
-def TEX_RECT : PatLeaf<
- (imm),
- [{uint32_t TType = (uint32_t)N->getZExtValue();
- return TType == 5;
- }]
->;
-
-def TEX_ARRAY : PatLeaf<
- (imm),
- [{uint32_t TType = (uint32_t)N->getZExtValue();
- return TType == 9 || TType == 10 || TType == 16;
- }]
->;
-
-def TEX_SHADOW_ARRAY : PatLeaf<
- (imm),
- [{uint32_t TType = (uint32_t)N->getZExtValue();
- return TType == 11 || TType == 12 || TType == 17;
- }]
->;
-
class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
dag outs, dag ins, string asm, list<dag> pattern> :
InstR600ISA <outs, ins, asm, pattern>,
@@ -357,6 +335,8 @@ def vtx_id2_load : LoadVtxId2 <load>;
// R600 SDNodes
//===----------------------------------------------------------------------===//
+let Namespace = "R600" in {
+
def INTERP_PAIR_XY : AMDGPUShaderInst <
(outs R600_TReg32_X:$dst0, R600_TReg32_Y:$dst1),
(ins i32imm:$src0, R600_TReg32_Y:$src1, R600_TReg32_X:$src2),
@@ -369,6 +349,8 @@ def INTERP_PAIR_ZW : AMDGPUShaderInst <
"INTERP_PAIR_ZW $src0 $src1 $src2 : $dst0 dst1",
[]>;
+}
+
def CONST_ADDRESS: SDNode<"AMDGPUISD::CONST_ADDRESS",
SDTypeProfile<1, -1, [SDTCisInt<0>, SDTCisPtrTy<1>]>,
[SDNPVariadic]
@@ -416,11 +398,15 @@ def : R600Pat<(TEXTURE_FETCH (i32 Textur
// Interpolation Instructions
//===----------------------------------------------------------------------===//
+let Namespace = "R600" in {
+
def INTERP_VEC_LOAD : AMDGPUShaderInst <
(outs R600_Reg128:$dst),
(ins i32imm:$src0),
"INTERP_LOAD $src0 : $dst">;
+}
+
def INTERP_XY : R600_2OP <0xD6, "INTERP_XY", []> {
let bank_swizzle = 5;
}
@@ -660,7 +646,7 @@ def PAD : R600WrapperInst <(outs), (ins)
let isCodeGenOnly = 1, isPseudo = 1 in {
-let usesCustomInserter = 1 in {
+let Namespace = "R600", usesCustomInserter = 1 in {
class FABS <RegisterClass rc> : AMDGPUShaderInst <
(outs rc:$dst),
@@ -792,7 +778,9 @@ class MOV_IMM <ValueType vt, Operand imm
(ins immType:$imm),
"",
[]
->;
+> {
+ let Namespace = "R600";
+}
} // end let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1
@@ -1007,7 +995,7 @@ class CNDGE_Common <bits<5> inst> : R600
}
-let isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
+let isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600" in {
class R600_VEC2OP<list<dag> pattern> : InstR600 <(outs R600_Reg32:$dst), (ins
// Slot X
UEM:$update_exec_mask_X, UP:$update_pred_X, WRITE:$write_X,
@@ -1326,7 +1314,9 @@ let Predicates = [isR600] in {
// Regist loads and stores - for indirect addressing
//===----------------------------------------------------------------------===//
+let Namespace = "R600" in {
defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>;
+}
// Hardcode channel to 0
// NOTE: LSHR is not available here. LSHR is per family instruction
@@ -1378,11 +1368,12 @@ let usesCustomInserter = 1 in {
let mayLoad = 0, mayStore = 0, hasSideEffects = 1 in {
-def MASK_WRITE : AMDGPUShaderInst <
+def MASK_WRITE : InstR600 <
(outs),
(ins R600_Reg32:$src),
"MASK_WRITE $src",
- []
+ [],
+ NullALU
>;
} // End mayLoad = 0, mayStore = 0, hasSideEffects = 1
@@ -1413,7 +1404,7 @@ def TXD_SHADOW: InstR600 <
// Constant Buffer Addressing Support
//===----------------------------------------------------------------------===//
-let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU" in {
+let usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "R600" in {
def CONST_COPY : Instruction {
let OutOperandList = (outs R600_Reg32:$dst);
let InOperandList = (ins i32imm:$src);
@@ -1536,23 +1527,6 @@ let Inst{63-32} = Word1;
//===---------------------------------------------------------------------===//
// Flow and Program control Instructions
//===---------------------------------------------------------------------===//
-class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern>
-: Instruction {
-
- let Namespace = "AMDGPU";
- dag OutOperandList = outs;
- dag InOperandList = ins;
- let Pattern = pattern;
- let AsmString = !strconcat(asmstr, "\n");
- let isPseudo = 1;
- let Itinerary = NullALU;
- bit hasIEEEFlag = 0;
- bit hasZeroOpFlag = 0;
- let mayLoad = 0;
- let mayStore = 0;
- let hasSideEffects = 0;
- let isCodeGenOnly = 1;
-}
multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> {
def _i32 : ILFormat<(outs),
@@ -1584,23 +1558,14 @@ multiclass BranchInstr2<string name> {
// Custom Inserter for Branches and returns, this eventually will be a
// separate pass
//===---------------------------------------------------------------------===//
-let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in {
+let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1,
+ Namespace = "R600" in {
def BRANCH : ILFormat<(outs), (ins brtarget:$target),
"; Pseudo unconditional branch instruction",
[(br bb:$target)]>;
defm BRANCH_COND : BranchConditional<IL_brcond, R600_Reg32, R600_Reg32>;
}
-//===---------------------------------------------------------------------===//
-// Return instruction
-//===---------------------------------------------------------------------===//
-let isTerminator = 1, isReturn = 1, hasCtrlDep = 1,
- usesCustomInserter = 1 in {
- def RETURN : ILFormat<(outs), (ins variable_ops),
- "RETURN", [(AMDGPUendpgm)]
- >;
-}
-
//===----------------------------------------------------------------------===//
// Branch Instructions
//===----------------------------------------------------------------------===//
@@ -1731,7 +1696,7 @@ def : R600Pat <
// KIL Patterns
def KIL : R600Pat <
- (int_AMDGPU_kill f32:$src0),
+ (int_r600_kill f32:$src0),
(MASK_WRITE (KILLGT (f32 ZERO), $src0))
>;
Modified: llvm/trunk/lib/Target/AMDGPU/R600MachineScheduler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600MachineScheduler.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600MachineScheduler.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600MachineScheduler.cpp Thu Jun 28 16:47:12 2018
@@ -162,7 +162,7 @@ void R600SchedStrategy::schedNode(SUnit
for (MachineInstr::mop_iterator It = SU->getInstr()->operands_begin(),
E = SU->getInstr()->operands_end(); It != E; ++It) {
MachineOperand &MO = *It;
- if (MO.isReg() && MO.getReg() == AMDGPU::ALU_LITERAL_X)
+ if (MO.isReg() && MO.getReg() == R600::ALU_LITERAL_X)
++CurEmitted;
}
}
@@ -181,7 +181,7 @@ void R600SchedStrategy::schedNode(SUnit
static bool
isPhysicalRegCopy(MachineInstr *MI) {
- if (MI->getOpcode() != AMDGPU::COPY)
+ if (MI->getOpcode() != R600::COPY)
return false;
return !TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg());
@@ -224,14 +224,14 @@ R600SchedStrategy::AluKind R600SchedStra
return AluTrans;
switch (MI->getOpcode()) {
- case AMDGPU::PRED_X:
+ case R600::PRED_X:
return AluPredX;
- case AMDGPU::INTERP_PAIR_XY:
- case AMDGPU::INTERP_PAIR_ZW:
- case AMDGPU::INTERP_VEC_LOAD:
- case AMDGPU::DOT_4:
+ case R600::INTERP_PAIR_XY:
+ case R600::INTERP_PAIR_ZW:
+ case R600::INTERP_VEC_LOAD:
+ case R600::DOT_4:
return AluT_XYZW;
- case AMDGPU::COPY:
+ case R600::COPY:
if (MI->getOperand(1).isUndef()) {
// MI will become a KILL, don't considers it in scheduling
return AluDiscarded;
@@ -246,7 +246,7 @@ R600SchedStrategy::AluKind R600SchedStra
if(TII->isVector(*MI) ||
TII->isCubeOp(MI->getOpcode()) ||
TII->isReductionOp(MI->getOpcode()) ||
- MI->getOpcode() == AMDGPU::GROUP_BARRIER) {
+ MI->getOpcode() == R600::GROUP_BARRIER) {
return AluT_XYZW;
}
@@ -257,13 +257,13 @@ R600SchedStrategy::AluKind R600SchedStra
// Is the result already assigned to a channel ?
unsigned DestSubReg = MI->getOperand(0).getSubReg();
switch (DestSubReg) {
- case AMDGPU::sub0:
+ case R600::sub0:
return AluT_X;
- case AMDGPU::sub1:
+ case R600::sub1:
return AluT_Y;
- case AMDGPU::sub2:
+ case R600::sub2:
return AluT_Z;
- case AMDGPU::sub3:
+ case R600::sub3:
return AluT_W;
default:
break;
@@ -271,16 +271,16 @@ R600SchedStrategy::AluKind R600SchedStra
// Is the result already member of a X/Y/Z/W class ?
unsigned DestReg = MI->getOperand(0).getReg();
- if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_XRegClass) ||
- regBelongsToClass(DestReg, &AMDGPU::R600_AddrRegClass))
+ if (regBelongsToClass(DestReg, &R600::R600_TReg32_XRegClass) ||
+ regBelongsToClass(DestReg, &R600::R600_AddrRegClass))
return AluT_X;
- if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_YRegClass))
+ if (regBelongsToClass(DestReg, &R600::R600_TReg32_YRegClass))
return AluT_Y;
- if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass))
+ if (regBelongsToClass(DestReg, &R600::R600_TReg32_ZRegClass))
return AluT_Z;
- if (regBelongsToClass(DestReg, &AMDGPU::R600_TReg32_WRegClass))
+ if (regBelongsToClass(DestReg, &R600::R600_TReg32_WRegClass))
return AluT_W;
- if (regBelongsToClass(DestReg, &AMDGPU::R600_Reg128RegClass))
+ if (regBelongsToClass(DestReg, &R600::R600_Reg128RegClass))
return AluT_XYZW;
// LDS src registers cannot be used in the Trans slot.
@@ -301,13 +301,13 @@ int R600SchedStrategy::getInstKind(SUnit
}
switch (Opcode) {
- case AMDGPU::PRED_X:
- case AMDGPU::COPY:
- case AMDGPU::CONST_COPY:
- case AMDGPU::INTERP_PAIR_XY:
- case AMDGPU::INTERP_PAIR_ZW:
- case AMDGPU::INTERP_VEC_LOAD:
- case AMDGPU::DOT_4:
+ case R600::PRED_X:
+ case R600::COPY:
+ case R600::CONST_COPY:
+ case R600::INTERP_PAIR_XY:
+ case R600::INTERP_PAIR_ZW:
+ case R600::INTERP_VEC_LOAD:
+ case R600::DOT_4:
return IDAlu;
default:
return IDOther;
@@ -353,7 +353,7 @@ void R600SchedStrategy::PrepareNextSlot(
}
void R600SchedStrategy::AssignSlot(MachineInstr* MI, unsigned Slot) {
- int DstIndex = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
+ int DstIndex = TII->getOperandIdx(MI->getOpcode(), R600::OpName::dst);
if (DstIndex == -1) {
return;
}
@@ -370,16 +370,16 @@ void R600SchedStrategy::AssignSlot(Machi
// Constrains the regclass of DestReg to assign it to Slot
switch (Slot) {
case 0:
- MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_XRegClass);
+ MRI->constrainRegClass(DestReg, &R600::R600_TReg32_XRegClass);
break;
case 1:
- MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_YRegClass);
+ MRI->constrainRegClass(DestReg, &R600::R600_TReg32_YRegClass);
break;
case 2:
- MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_ZRegClass);
+ MRI->constrainRegClass(DestReg, &R600::R600_TReg32_ZRegClass);
break;
case 3:
- MRI->constrainRegClass(DestReg, &AMDGPU::R600_TReg32_WRegClass);
+ MRI->constrainRegClass(DestReg, &R600::R600_TReg32_WRegClass);
break;
}
}
Modified: llvm/trunk/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp Thu Jun 28 16:47:12 2018
@@ -79,7 +79,7 @@ public:
std::vector<unsigned> UndefReg;
RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
- assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE);
+ assert(MI->getOpcode() == R600::REG_SEQUENCE);
for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
MachineOperand &MO = Instr->getOperand(i);
unsigned Chan = Instr->getOperand(i + 1).getImm();
@@ -159,8 +159,8 @@ bool R600VectorRegMerger::canSwizzle(con
if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
return true;
switch (MI.getOpcode()) {
- case AMDGPU::R600_ExportSwz:
- case AMDGPU::EG_ExportSwz:
+ case R600::R600_ExportSwz:
+ case R600::EG_ExportSwz:
return true;
default:
return false;
@@ -213,12 +213,12 @@ MachineInstr *R600VectorRegMerger::Rebui
std::vector<unsigned> UpdatedUndef = BaseRSI->UndefReg;
for (DenseMap<unsigned, unsigned>::iterator It = RSI->RegToChan.begin(),
E = RSI->RegToChan.end(); It != E; ++It) {
- unsigned DstReg = MRI->createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
+ unsigned DstReg = MRI->createVirtualRegister(&R600::R600_Reg128RegClass);
unsigned SubReg = (*It).first;
unsigned Swizzle = (*It).second;
unsigned Chan = getReassignedChan(RemapChan, Swizzle);
- MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(AMDGPU::INSERT_SUBREG),
+ MachineInstr *Tmp = BuildMI(MBB, Pos, DL, TII->get(R600::INSERT_SUBREG),
DstReg)
.addReg(SrcVec)
.addReg(SubReg)
@@ -234,7 +234,7 @@ MachineInstr *R600VectorRegMerger::Rebui
SrcVec = DstReg;
}
MachineInstr *NewMI =
- BuildMI(MBB, Pos, DL, TII->get(AMDGPU::COPY), Reg).addReg(SrcVec);
+ BuildMI(MBB, Pos, DL, TII->get(R600::COPY), Reg).addReg(SrcVec);
LLVM_DEBUG(dbgs() << " ->"; NewMI->dump(););
LLVM_DEBUG(dbgs() << " Updating Swizzle:\n");
@@ -354,7 +354,7 @@ bool R600VectorRegMerger::runOnMachineFu
for (MachineBasicBlock::iterator MII = MB->begin(), MIIE = MB->end();
MII != MIIE; ++MII) {
MachineInstr &MI = *MII;
- if (MI.getOpcode() != AMDGPU::REG_SEQUENCE) {
+ if (MI.getOpcode() != R600::REG_SEQUENCE) {
if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST) {
unsigned Reg = MI.getOperand(1).getReg();
for (MachineRegisterInfo::def_instr_iterator
Modified: llvm/trunk/lib/Target/AMDGPU/R600Packetizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600Packetizer.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600Packetizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600Packetizer.cpp Thu Jun 28 16:47:12 2018
@@ -84,39 +84,39 @@ private:
LastDstChan = BISlot;
if (TII->isPredicated(*BI))
continue;
- int OperandIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::write);
+ int OperandIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::write);
if (OperandIdx > -1 && BI->getOperand(OperandIdx).getImm() == 0)
continue;
- int DstIdx = TII->getOperandIdx(BI->getOpcode(), AMDGPU::OpName::dst);
+ int DstIdx = TII->getOperandIdx(BI->getOpcode(), R600::OpName::dst);
if (DstIdx == -1) {
continue;
}
unsigned Dst = BI->getOperand(DstIdx).getReg();
if (isTrans || TII->isTransOnly(*BI)) {
- Result[Dst] = AMDGPU::PS;
+ Result[Dst] = R600::PS;
continue;
}
- if (BI->getOpcode() == AMDGPU::DOT4_r600 ||
- BI->getOpcode() == AMDGPU::DOT4_eg) {
- Result[Dst] = AMDGPU::PV_X;
+ if (BI->getOpcode() == R600::DOT4_r600 ||
+ BI->getOpcode() == R600::DOT4_eg) {
+ Result[Dst] = R600::PV_X;
continue;
}
- if (Dst == AMDGPU::OQAP) {
+ if (Dst == R600::OQAP) {
continue;
}
unsigned PVReg = 0;
switch (TRI.getHWRegChan(Dst)) {
case 0:
- PVReg = AMDGPU::PV_X;
+ PVReg = R600::PV_X;
break;
case 1:
- PVReg = AMDGPU::PV_Y;
+ PVReg = R600::PV_Y;
break;
case 2:
- PVReg = AMDGPU::PV_Z;
+ PVReg = R600::PV_Z;
break;
case 3:
- PVReg = AMDGPU::PV_W;
+ PVReg = R600::PV_W;
break;
default:
llvm_unreachable("Invalid Chan");
@@ -129,9 +129,9 @@ private:
void substitutePV(MachineInstr &MI, const DenseMap<unsigned, unsigned> &PVs)
const {
unsigned Ops[] = {
- AMDGPU::OpName::src0,
- AMDGPU::OpName::src1,
- AMDGPU::OpName::src2
+ R600::OpName::src0,
+ R600::OpName::src1,
+ R600::OpName::src2
};
for (unsigned i = 0; i < 3; i++) {
int OperandIdx = TII->getOperandIdx(MI.getOpcode(), Ops[i]);
@@ -171,7 +171,7 @@ public:
return true;
if (!TII->isALUInstr(MI.getOpcode()))
return true;
- if (MI.getOpcode() == AMDGPU::GROUP_BARRIER)
+ if (MI.getOpcode() == R600::GROUP_BARRIER)
return true;
// XXX: This can be removed once the packetizer properly handles all the
// LDS instruction group restrictions.
@@ -185,8 +185,8 @@ public:
if (getSlot(*MII) == getSlot(*MIJ))
ConsideredInstUsesAlreadyWrittenVectorElement = true;
// Does MII and MIJ share the same pred_sel ?
- int OpI = TII->getOperandIdx(MII->getOpcode(), AMDGPU::OpName::pred_sel),
- OpJ = TII->getOperandIdx(MIJ->getOpcode(), AMDGPU::OpName::pred_sel);
+ int OpI = TII->getOperandIdx(MII->getOpcode(), R600::OpName::pred_sel),
+ OpJ = TII->getOperandIdx(MIJ->getOpcode(), R600::OpName::pred_sel);
unsigned PredI = (OpI > -1)?MII->getOperand(OpI).getReg():0,
PredJ = (OpJ > -1)?MIJ->getOperand(OpJ).getReg():0;
if (PredI != PredJ)
@@ -220,7 +220,7 @@ public:
}
void setIsLastBit(MachineInstr *MI, unsigned Bit) const {
- unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::last);
+ unsigned LastOp = TII->getOperandIdx(MI->getOpcode(), R600::OpName::last);
MI->getOperand(LastOp).setImm(Bit);
}
@@ -301,11 +301,11 @@ public:
for (unsigned i = 0, e = CurrentPacketMIs.size(); i < e; i++) {
MachineInstr *MI = CurrentPacketMIs[i];
unsigned Op = TII->getOperandIdx(MI->getOpcode(),
- AMDGPU::OpName::bank_swizzle);
+ R600::OpName::bank_swizzle);
MI->getOperand(Op).setImm(BS[i]);
}
unsigned Op =
- TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::bank_swizzle);
+ TII->getOperandIdx(MI.getOpcode(), R600::OpName::bank_swizzle);
MI.getOperand(Op).setImm(BS.back());
if (!CurrentPacketMIs.empty())
setIsLastBit(CurrentPacketMIs.back(), 0);
@@ -334,6 +334,7 @@ bool R600Packetizer::runOnMachineFunctio
// DFA state table should not be empty.
assert(Packetizer.getResourceTracker() && "Empty DFA table!");
+ assert(Packetizer.getResourceTracker()->getInstrItins());
if (Packetizer.getResourceTracker()->getInstrItins()->isEmpty())
return false;
@@ -353,8 +354,8 @@ bool R600Packetizer::runOnMachineFunctio
MachineBasicBlock::iterator End = MBB->end();
MachineBasicBlock::iterator MI = MBB->begin();
while (MI != End) {
- if (MI->isKill() || MI->getOpcode() == AMDGPU::IMPLICIT_DEF ||
- (MI->getOpcode() == AMDGPU::CF_ALU && !MI->getOperand(8).getImm())) {
+ if (MI->isKill() || MI->getOpcode() == R600::IMPLICIT_DEF ||
+ (MI->getOpcode() == R600::CF_ALU && !MI->getOperand(8).getImm())) {
MachineBasicBlock::iterator DeleteMI = MI;
++MI;
MBB->erase(DeleteMI);
Modified: llvm/trunk/lib/Target/AMDGPU/R600Processors.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600Processors.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600Processors.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600Processors.td Thu Jun 28 16:47:12 2018
@@ -7,6 +7,62 @@
//
//===----------------------------------------------------------------------===//
+class SubtargetFeatureFetchLimit <string Value> :
+ SubtargetFeature <"fetch"#Value,
+ "TexVTXClauseSize",
+ Value,
+ "Limit the maximum number of fetches in a clause to "#Value
+>;
+
+def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst",
+ "R600ALUInst",
+ "false",
+ "Older version of ALU instructions encoding"
+>;
+
+def FeatureFetchLimit8 : SubtargetFeatureFetchLimit <"8">;
+def FeatureFetchLimit16 : SubtargetFeatureFetchLimit <"16">;
+
+def FeatureVertexCache : SubtargetFeature<"HasVertexCache",
+ "HasVertexCache",
+ "true",
+ "Specify use of dedicated vertex cache"
+>;
+
+def FeatureCaymanISA : SubtargetFeature<"caymanISA",
+ "CaymanISA",
+ "true",
+ "Use Cayman ISA"
+>;
+
+def FeatureCFALUBug : SubtargetFeature<"cfalubug",
+ "CFALUBug",
+ "true",
+ "GPU has CF_ALU bug"
+>;
+
+class R600SubtargetFeatureGeneration <string Value,
+ list<SubtargetFeature> Implies> :
+ SubtargetFeatureGeneration <Value, "R600Subtarget", Implies>;
+
+def FeatureR600 : R600SubtargetFeatureGeneration<"R600",
+ [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]
+>;
+
+def FeatureR700 : R600SubtargetFeatureGeneration<"R700",
+ [FeatureFetchLimit16, FeatureLocalMemorySize0]
+>;
+
+def FeatureEvergreen : R600SubtargetFeatureGeneration<"EVERGREEN",
+ [FeatureFetchLimit16, FeatureLocalMemorySize32768]
+>;
+
+def FeatureNorthernIslands : R600SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
+ [FeatureFetchLimit16, FeatureWavefrontSize64,
+ FeatureLocalMemorySize32768]
+>;
+
+
//===----------------------------------------------------------------------===//
// Radeon HD 2000/3000 Series (R600).
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.cpp Thu Jun 28 16:47:12 2018
@@ -21,34 +21,37 @@
using namespace llvm;
-R600RegisterInfo::R600RegisterInfo() : AMDGPURegisterInfo() {
+R600RegisterInfo::R600RegisterInfo() : R600GenRegisterInfo(0) {
RCW.RegWeight = 0;
RCW.WeightLimit = 0;
}
+#define GET_REGINFO_TARGET_DESC
+#include "R600GenRegisterInfo.inc"
+
BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
const R600InstrInfo *TII = ST.getInstrInfo();
- reserveRegisterTuples(Reserved, AMDGPU::ZERO);
- reserveRegisterTuples(Reserved, AMDGPU::HALF);
- reserveRegisterTuples(Reserved, AMDGPU::ONE);
- reserveRegisterTuples(Reserved, AMDGPU::ONE_INT);
- reserveRegisterTuples(Reserved, AMDGPU::NEG_HALF);
- reserveRegisterTuples(Reserved, AMDGPU::NEG_ONE);
- reserveRegisterTuples(Reserved, AMDGPU::PV_X);
- reserveRegisterTuples(Reserved, AMDGPU::ALU_LITERAL_X);
- reserveRegisterTuples(Reserved, AMDGPU::ALU_CONST);
- reserveRegisterTuples(Reserved, AMDGPU::PREDICATE_BIT);
- reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_OFF);
- reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_ZERO);
- reserveRegisterTuples(Reserved, AMDGPU::PRED_SEL_ONE);
- reserveRegisterTuples(Reserved, AMDGPU::INDIRECT_BASE_ADDR);
+ reserveRegisterTuples(Reserved, R600::ZERO);
+ reserveRegisterTuples(Reserved, R600::HALF);
+ reserveRegisterTuples(Reserved, R600::ONE);
+ reserveRegisterTuples(Reserved, R600::ONE_INT);
+ reserveRegisterTuples(Reserved, R600::NEG_HALF);
+ reserveRegisterTuples(Reserved, R600::NEG_ONE);
+ reserveRegisterTuples(Reserved, R600::PV_X);
+ reserveRegisterTuples(Reserved, R600::ALU_LITERAL_X);
+ reserveRegisterTuples(Reserved, R600::ALU_CONST);
+ reserveRegisterTuples(Reserved, R600::PREDICATE_BIT);
+ reserveRegisterTuples(Reserved, R600::PRED_SEL_OFF);
+ reserveRegisterTuples(Reserved, R600::PRED_SEL_ZERO);
+ reserveRegisterTuples(Reserved, R600::PRED_SEL_ONE);
+ reserveRegisterTuples(Reserved, R600::INDIRECT_BASE_ADDR);
- for (TargetRegisterClass::iterator I = AMDGPU::R600_AddrRegClass.begin(),
- E = AMDGPU::R600_AddrRegClass.end(); I != E; ++I) {
+ for (TargetRegisterClass::iterator I = R600::R600_AddrRegClass.begin(),
+ E = R600::R600_AddrRegClass.end(); I != E; ++I) {
reserveRegisterTuples(Reserved, *I);
}
@@ -58,7 +61,7 @@ BitVector R600RegisterInfo::getReservedR
}
// Dummy to not crash RegisterClassInfo.
-static const MCPhysReg CalleeSavedReg = AMDGPU::NoRegister;
+static const MCPhysReg CalleeSavedReg = R600::NoRegister;
const MCPhysReg *R600RegisterInfo::getCalleeSavedRegs(
const MachineFunction *) const {
@@ -66,7 +69,7 @@ const MCPhysReg *R600RegisterInfo::getCa
}
unsigned R600RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
- return AMDGPU::NoRegister;
+ return R600::NoRegister;
}
unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const {
@@ -81,7 +84,7 @@ const TargetRegisterClass * R600Register
MVT VT) const {
switch(VT.SimpleTy) {
default:
- case MVT::i32: return &AMDGPU::R600_TReg32RegClass;
+ case MVT::i32: return &R600::R600_TReg32RegClass;
}
}
@@ -94,9 +97,9 @@ bool R600RegisterInfo::isPhysRegLiveAcro
assert(!TargetRegisterInfo::isVirtualRegister(Reg));
switch (Reg) {
- case AMDGPU::OQAP:
- case AMDGPU::OQBP:
- case AMDGPU::AR_X:
+ case R600::OQAP:
+ case R600::OQBP:
+ case R600::AR_X:
return false;
default:
return true;
@@ -109,3 +112,10 @@ void R600RegisterInfo::eliminateFrameInd
RegScavenger *RS) const {
llvm_unreachable("Subroutines not supported yet");
}
+
+void R600RegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
+ MCRegAliasIterator R(Reg, this, true);
+
+ for (; R.isValid(); ++R)
+ Reserved.set(*R);
+}
Modified: llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.h Thu Jun 28 16:47:12 2018
@@ -15,13 +15,14 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_R600REGISTERINFO_H
#define LLVM_LIB_TARGET_AMDGPU_R600REGISTERINFO_H
-#include "AMDGPURegisterInfo.h"
+#define GET_REGINFO_HEADER
+#include "R600GenRegisterInfo.inc"
namespace llvm {
class AMDGPUSubtarget;
-struct R600RegisterInfo final : public AMDGPURegisterInfo {
+struct R600RegisterInfo final : public R600GenRegisterInfo {
RegClassWeight RCW;
R600RegisterInfo();
@@ -49,6 +50,8 @@ struct R600RegisterInfo final : public A
void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
unsigned FIOperandNum,
RegScavenger *RS = nullptr) const override;
+
+ void reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const;
};
} // End namespace llvm
Modified: llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600RegisterInfo.td Thu Jun 28 16:47:12 2018
@@ -245,7 +245,7 @@ def R600_Reg128Vertical : RegisterClass<
(add V0123_W, V0123_Z, V0123_Y, V0123_X)
>;
-def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
+def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32, i64, f64], 64,
(add (sequence "T%u_XY", 0, 63))>;
def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64,
Modified: llvm/trunk/lib/Target/AMDGPU/R700Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R700Instructions.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R700Instructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/R700Instructions.td Thu Jun 28 16:47:12 2018
@@ -13,7 +13,7 @@
//
//===----------------------------------------------------------------------===//
-def isR700 : Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::R700">;
+def isR700 : Predicate<"Subtarget->getGeneration() == R600Subtarget::R700">;
let Predicates = [isR700] in {
def SIN_r700 : SIN_Common<0x6E>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFoldOperands.cpp Thu Jun 28 16:47:12 2018
@@ -76,7 +76,7 @@ public:
MachineRegisterInfo *MRI;
const SIInstrInfo *TII;
const SIRegisterInfo *TRI;
- const SISubtarget *ST;
+ const AMDGPUSubtarget *ST;
void foldOperand(MachineOperand &OpToFold,
MachineInstr *UseMI,
@@ -972,7 +972,7 @@ bool SIFoldOperands::runOnMachineFunctio
return false;
MRI = &MF.getRegInfo();
- ST = &MF.getSubtarget<SISubtarget>();
+ ST = &MF.getSubtarget<AMDGPUSubtarget>();
TII = ST->getInstrInfo();
TRI = &TII->getRegisterInfo();
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Thu Jun 28 16:47:12 2018
@@ -113,7 +113,8 @@ static unsigned findFirstFreeSGPR(CCStat
SITargetLowering::SITargetLowering(const TargetMachine &TM,
const SISubtarget &STI)
- : AMDGPUTargetLowering(TM, STI) {
+ : AMDGPUTargetLowering(TM, STI),
+ Subtarget(&STI) {
addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
@@ -147,7 +148,7 @@ SITargetLowering::SITargetLowering(const
addRegisterClass(MVT::v4f16, &AMDGPU::SReg_64RegClass);
}
- computeRegisterProperties(STI.getRegisterInfo());
+ computeRegisterProperties(Subtarget->getRegisterInfo());
// We need to custom lower vector stores from local memory
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
@@ -323,7 +324,7 @@ SITargetLowering::SITargetLowering(const
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand);
setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand);
- if (getSubtarget()->hasFlatAddressSpace()) {
+ if (Subtarget->hasFlatAddressSpace()) {
setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
}
@@ -336,6 +337,44 @@ SITargetLowering::SITargetLowering(const
setOperationAction(ISD::TRAP, MVT::Other, Custom);
setOperationAction(ISD::DEBUGTRAP, MVT::Other, Custom);
+ if (Subtarget->has16BitInsts()) {
+ setOperationAction(ISD::FLOG, MVT::f16, Custom);
+ setOperationAction(ISD::FLOG10, MVT::f16, Custom);
+ }
+
+ // v_mad_f32 does not support denormals according to some sources.
+ if (!Subtarget->hasFP32Denormals())
+ setOperationAction(ISD::FMAD, MVT::f32, Legal);
+
+ if (!Subtarget->hasBFI()) {
+ // fcopysign can be done in a single instruction with BFI.
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ }
+
+ if (!Subtarget->hasBCNT(32))
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+
+ if (!Subtarget->hasBCNT(64))
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
+
+ if (Subtarget->hasFFBH())
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
+
+ if (Subtarget->hasFFBL())
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
+
+ // We only really have 32-bit BFE instructions (and 16-bit on VI).
+ //
+ // On SI+ there are 64-bit BFEs, but they are scalar only and there isn't any
+ // effort to match them now. We want this to be false for i64 cases when the
+ // extraction isn't restricted to the upper or lower half. Ideally we would
+ // have some pass reduce 64-bit extracts to 32-bit if possible. Extracts that
+ // span the midpoint are probably relatively rare, so don't worry about them
+ // for now.
+ if (Subtarget->hasBFE())
+ setHasExtractBitsInsn(true);
+
setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
@@ -343,6 +382,11 @@ SITargetLowering::SITargetLowering(const
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FRINT, MVT::f64, Legal);
+ } else {
+ setOperationAction(ISD::FCEIL, MVT::f64, Custom);
+ setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
+ setOperationAction(ISD::FRINT, MVT::f64, Custom);
+ setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
}
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
@@ -616,10 +660,15 @@ SITargetLowering::SITargetLowering(const
setTargetDAGCombine(ISD::ATOMIC_LOAD_UMAX);
setSchedulingPreference(Sched::RegPressure);
+
+ // SI at least has hardware support for floating point exceptions, but no way
+ // of using or handling them is implemented. They are also optional in OpenCL
+ // (Section 7.3)
+ setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
}
const SISubtarget *SITargetLowering::getSubtarget() const {
- return static_cast<const SISubtarget *>(Subtarget);
+ return Subtarget;
}
//===----------------------------------------------------------------------===//
@@ -2012,8 +2061,7 @@ SITargetLowering::LowerReturn(SDValue Ch
// FIXME: Does sret work properly?
if (!Info->isEntryFunction()) {
- const SIRegisterInfo *TRI
- = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
const MCPhysReg *I =
TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
if (I) {
@@ -2115,8 +2163,7 @@ void SITargetLowering::passSpecialInputs
SelectionDAG &DAG = CLI.DAG;
const SDLoc &DL = CLI.DL;
- const SISubtarget *ST = getSubtarget();
- const SIRegisterInfo *TRI = ST->getRegisterInfo();
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
auto &ArgUsageInfo =
DAG.getPass()->getAnalysis<AMDGPUArgumentUsageInfo>();
@@ -2561,7 +2608,7 @@ SDValue SITargetLowering::LowerCall(Call
// Add a register mask operand representing the call-preserved registers.
- const AMDGPURegisterInfo *TRI = Subtarget->getRegisterInfo();
+ auto *TRI = static_cast<const SIRegisterInfo*>(Subtarget->getRegisterInfo());
const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
assert(Mask && "Missing call preserved mask for calling convention");
Ops.push_back(DAG.getRegisterMask(Mask));
@@ -8179,8 +8226,7 @@ void SITargetLowering::finalizeLowering(
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
const MachineFrameInfo &MFI = MF.getFrameInfo();
- const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
if (Info->isEntryFunction()) {
// Callable functions have fixed registers used for stack access.
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Thu Jun 28 16:47:12 2018
@@ -22,6 +22,9 @@
namespace llvm {
class SITargetLowering final : public AMDGPUTargetLowering {
+private:
+ const SISubtarget *Subtarget;
+
SDValue lowerKernArgParameterPtr(SelectionDAG &DAG, const SDLoc &SL,
SDValue Chain, uint64_t Offset) const;
SDValue getImplicitArgPtr(SelectionDAG &DAG, const SDLoc &SL) const;
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Thu Jun 28 16:47:12 2018
@@ -934,8 +934,7 @@ void SIInsertWaitcnts::generateWaitcntIn
// All waits must be resolved at call return.
// NOTE: this could be improved with knowledge of all call sites or
// with knowledge of the called routines.
- if (MI.getOpcode() == AMDGPU::RETURN ||
- MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
+ if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
@@ -1131,7 +1130,7 @@ void SIInsertWaitcnts::generateWaitcntIn
// TODO: Remove this work-around, enable the assert for Bug 457939
// after fixing the scheduler. Also, the Shader Compiler code is
// independent of target.
- if (readsVCCZ(MI) && ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
+ if (readsVCCZ(MI) && ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) {
if (ScoreBrackets->getScoreLB(LGKM_CNT) <
ScoreBrackets->getScoreUB(LGKM_CNT) &&
ScoreBrackets->hasPendingSMEM()) {
@@ -1716,7 +1715,7 @@ void SIInsertWaitcnts::insertWaitcntInBl
if (ScoreBrackets->getScoreLB(LGKM_CNT) <
ScoreBrackets->getScoreUB(LGKM_CNT) &&
ScoreBrackets->hasPendingSMEM()) {
- if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS)
+ if (ST->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
VCCZBugWorkAround = true;
}
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrFormats.td Thu Jun 28 16:47:12 2018
@@ -21,7 +21,7 @@ def isSI : Predicate<"Subtarget->getGene
class InstSI <dag outs, dag ins, string asm = "",
list<dag> pattern = []> :
- AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
+ AMDGPUInst<outs, ins, asm, pattern>, GCNPredicateControl {
let SubtargetPredicate = isGCN;
// Low bits - basic encoding information.
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Thu Jun 28 16:47:12 2018
@@ -14,6 +14,7 @@
#include "SIInstrInfo.h"
#include "AMDGPU.h"
+#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "GCNHazardRecognizer.h"
#include "SIDefines.h"
@@ -63,6 +64,19 @@
using namespace llvm;
+#define GET_INSTRINFO_CTOR_DTOR
+#include "AMDGPUGenInstrInfo.inc"
+
+namespace llvm {
+namespace AMDGPU {
+#define GET_D16ImageDimIntrinsics_IMPL
+#define GET_ImageDimIntrinsicTable_IMPL
+#define GET_RsrcIntrinsics_IMPL
+#include "AMDGPUGenSearchableTables.inc"
+}
+}
+
+
// Must be at least 4 to be able to branch over minimum unconditional branch
// code. This is only for making it possible to write reasonably small tests for
// long branches.
@@ -71,7 +85,8 @@ BranchOffsetBits("amdgpu-s-branch-bits",
cl::desc("Restrict range of branch instructions (DEBUG)"));
SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
- : AMDGPUInstrInfo(ST), RI(ST), ST(ST) {}
+ : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
+ RI(ST), ST(ST) {}
//===----------------------------------------------------------------------===//
// TargetInstrInfo callbacks
@@ -438,6 +453,28 @@ bool SIInstrInfo::shouldClusterMemOps(Ma
return (NumLoads * (RI.getRegSizeInBits(*DstRC) / 8)) <= LoadClusterThreshold;
}
+// FIXME: This behaves strangely. If, for example, you have 32 load + stores,
+// the first 16 loads will be interleaved with the stores, and the next 16 will
+// be clustered as expected. It should really split into 2 16 store batches.
+//
+// Loads are clustered until this returns false, rather than trying to schedule
+// groups of stores. This also means we have to deal with saying different
+// address space loads should be clustered, and ones which might cause bank
+// conflicts.
+//
+// This might be deprecated so it might not be worth that much effort to fix.
+bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
+ int64_t Offset0, int64_t Offset1,
+ unsigned NumLoads) const {
+ assert(Offset1 > Offset0 &&
+ "Second offset should be larger than first offset!");
+ // If we have less than 16 loads in a row, and the offsets are within 64
+ // bytes, then schedule together.
+
+ // A cacheline is 64 bytes (for global memory).
+ return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
+}
+
static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DestReg,
@@ -998,7 +1035,7 @@ unsigned SIInstrInfo::calculateLDSSpillA
unsigned FrameOffset, unsigned Size) const {
MachineFunction *MF = MBB.getParent();
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
+ const AMDGPUSubtarget &ST = MF->getSubtarget<AMDGPUSubtarget>();
DebugLoc DL = MBB.findDebugLoc(MI);
unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
unsigned WavefrontSize = ST.getWavefrontSize();
@@ -1134,7 +1171,7 @@ bool SIInstrInfo::expandPostRAPseudo(Mac
MachineBasicBlock &MBB = *MI.getParent();
DebugLoc DL = MBB.findDebugLoc(MI);
switch (MI.getOpcode()) {
- default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
+ default: return TargetInstrInfo::expandPostRAPseudo(MI);
case AMDGPU::S_MOV_B64_term:
// This is only a terminator to get the correct spill code placement during
// register allocation.
@@ -1900,16 +1937,16 @@ unsigned SIInstrInfo::getAddressSpaceFor
switch(Kind) {
case PseudoSourceValue::Stack:
case PseudoSourceValue::FixedStack:
- return AMDGPUASI.PRIVATE_ADDRESS;
+ return ST.getAMDGPUAS().PRIVATE_ADDRESS;
case PseudoSourceValue::ConstantPool:
case PseudoSourceValue::GOT:
case PseudoSourceValue::JumpTable:
case PseudoSourceValue::GlobalValueCallEntry:
case PseudoSourceValue::ExternalSymbolCallEntry:
case PseudoSourceValue::TargetCustom:
- return AMDGPUASI.CONSTANT_ADDRESS;
+ return ST.getAMDGPUAS().CONSTANT_ADDRESS;
}
- return AMDGPUASI.FLAT_ADDRESS;
+ return ST.getAMDGPUAS().FLAT_ADDRESS;
}
static void removeModOperands(MachineInstr &MI) {
@@ -4649,7 +4686,7 @@ unsigned SIInstrInfo::isStackAccess(cons
return AMDGPU::NoRegister;
assert(!MI.memoperands_empty() &&
- (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS);
+ (*MI.memoperands_begin())->getAddrSpace() == ST.getAMDGPUAS().PRIVATE_ADDRESS);
FrameIndex = Addr->getIndex();
return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
@@ -4768,7 +4805,7 @@ bool SIInstrInfo::mayAccessFlatAddressSp
return true;
for (const MachineMemOperand *MMO : MI.memoperands()) {
- if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS)
+ if (MMO->getAddrSpace() == ST.getAMDGPUAS().FLAT_ADDRESS)
return true;
}
return false;
@@ -4948,3 +4985,55 @@ bool SIInstrInfo::isBufferSMRD(const Mac
const auto RCID = MI.getDesc().OpInfo[Idx].RegClass;
return RCID == AMDGPU::SReg_128RegClassID;
}
+
+// This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
+enum SIEncodingFamily {
+ SI = 0,
+ VI = 1,
+ SDWA = 2,
+ SDWA9 = 3,
+ GFX80 = 4,
+ GFX9 = 5
+};
+
+static SIEncodingFamily subtargetEncodingFamily(const SISubtarget &ST) {
+ switch (ST.getGeneration()) {
+ case SISubtarget::SOUTHERN_ISLANDS:
+ case SISubtarget::SEA_ISLANDS:
+ return SIEncodingFamily::SI;
+ case SISubtarget::VOLCANIC_ISLANDS:
+ case SISubtarget::GFX9:
+ return SIEncodingFamily::VI;
+ }
+ llvm_unreachable("Unknown subtarget generation!");
+}
+
+int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
+ SIEncodingFamily Gen = subtargetEncodingFamily(ST);
+
+ if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
+ ST.getGeneration() >= SISubtarget::GFX9)
+ Gen = SIEncodingFamily::GFX9;
+
+ if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
+ Gen = ST.getGeneration() == SISubtarget::GFX9 ? SIEncodingFamily::SDWA9
+ : SIEncodingFamily::SDWA;
+ // Adjust the encoding family to GFX80 for D16 buffer instructions when the
+ // subtarget has UnpackedD16VMem feature.
+ // TODO: remove this when we discard GFX80 encoding.
+ if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16Buf))
+ Gen = SIEncodingFamily::GFX80;
+
+ int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
+
+ // -1 means that Opcode is already a native instruction.
+ if (MCOp == -1)
+ return Opcode;
+
+ // (uint16_t)-1 means that Opcode is a pseudo instruction that has
+ // no encoding in the given subtarget generation.
+ if (MCOp == (uint16_t)-1)
+ return -1;
+
+ return MCOp;
+}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Thu Jun 28 16:47:12 2018
@@ -31,6 +31,9 @@
#include <cassert>
#include <cstdint>
+#define GET_INSTRINFO_HEADER
+#include "AMDGPUGenInstrInfo.inc"
+
namespace llvm {
class APInt;
@@ -39,7 +42,7 @@ class RegScavenger;
class SISubtarget;
class TargetRegisterClass;
-class SIInstrInfo final : public AMDGPUInstrInfo {
+class SIInstrInfo final : public AMDGPUGenInstrInfo {
private:
const SIRegisterInfo RI;
const SISubtarget &ST;
@@ -163,7 +166,10 @@ public:
bool shouldClusterMemOps(MachineInstr &FirstLdSt, unsigned BaseReg1,
MachineInstr &SecondLdSt, unsigned BaseReg2,
- unsigned NumLoads) const final;
+ unsigned NumLoads) const override;
+
+ bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
+ int64_t Offset1, unsigned NumLoads) const override;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
@@ -871,6 +877,12 @@ public:
static bool isLegalMUBUFImmOffset(unsigned Imm) {
return isUInt<12>(Imm);
}
+
+ /// \brief Return a target-specific opcode if Opcode is a pseudo instruction.
+ /// Return -1 if the target-specific opcode for the pseudo instruction does
+ /// not exist. If Opcode is not a pseudo instruction, this is identity.
+ int pseudoToMCOpcode(int Opcode) const;
+
};
namespace AMDGPU {
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Thu Jun 28 16:47:12 2018
@@ -17,6 +17,11 @@ def isVIOnly : Predicate<"Subtarget->get
def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
+class GCNPredicateControl : PredicateControl {
+ Predicate SIAssemblerPredicate = isSICI;
+ Predicate VIAssemblerPredicate = isVI;
+}
+
// Execpt for the NONE field, this must be kept in sync with the
// SIEncodingFamily enum in AMDGPUInstrInfo.cpp
def SIEncodingFamily {
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstructions.td?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstructions.td Thu Jun 28 16:47:12 2018
@@ -11,11 +11,10 @@
// that are not yet supported remain commented out.
//===----------------------------------------------------------------------===//
-class GCNPat<dag pattern, dag result> : AMDGPUPat<pattern, result> {
+class GCNPat<dag pattern, dag result> : Pat<pattern, result>, GCNPredicateControl {
let SubtargetPredicate = isGCN;
}
-
include "VOPInstructions.td"
include "SOPInstructions.td"
include "SMInstructions.td"
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Thu Jun 28 16:47:12 2018
@@ -1232,8 +1232,6 @@ const TargetRegisterClass *SIRegisterInf
&AMDGPU::VReg_512RegClass,
&AMDGPU::SReg_512RegClass,
&AMDGPU::SCC_CLASSRegClass,
- &AMDGPU::R600_Reg32RegClass,
- &AMDGPU::R600_PredicateRegClass,
&AMDGPU::Pseudo_SReg_32RegClass,
&AMDGPU::Pseudo_SReg_128RegClass,
};
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.h Thu Jun 28 16:47:12 2018
@@ -21,6 +21,7 @@
namespace llvm {
+class AMDGPUSubtarget;
class LiveIntervals;
class MachineRegisterInfo;
class SISubtarget;
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Thu Jun 28 16:47:12 2018
@@ -181,7 +181,7 @@ IsaVersion getIsaVersion(const FeatureBi
if (Features.test(FeatureGFX9))
return {9, 0, 0};
- if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
+ if (Features.test(FeatureSouthernIslands))
return {0, 0, 0};
return {7, 0, 0};
}
@@ -243,7 +243,7 @@ unsigned getMaxWorkGroupsPerCU(const Fea
}
unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
- return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
+ return getMaxWavesPerEU() * getEUsPerCU(Features);
}
unsigned getMaxWavesPerCU(const FeatureBitset &Features,
@@ -255,9 +255,7 @@ unsigned getMinWavesPerEU(const FeatureB
return 1;
}
-unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
- if (!Features.test(FeatureGCN))
- return 8;
+unsigned getMaxWavesPerEU() {
// FIXME: Need to take scratch memory into account.
return 10;
}
@@ -313,7 +311,7 @@ unsigned getAddressableNumSGPRs(const Fe
unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
- if (WavesPerEU >= getMaxWavesPerEU(Features))
+ if (WavesPerEU >= getMaxWavesPerEU())
return 0;
unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
@@ -390,7 +388,7 @@ unsigned getAddressableNumVGPRs(const Fe
unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
- if (WavesPerEU >= getMaxWavesPerEU(Features))
+ if (WavesPerEU >= getMaxWavesPerEU())
return 0;
unsigned MinNumVGPRs =
alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
@@ -735,6 +733,8 @@ bool isRegIntersect(unsigned Reg0, unsig
case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
+ if (STI.getTargetTriple().getArch() == Triple::r600)
+ return Reg;
MAP_REG2REG
}
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=335942&r1=335941&r2=335942&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Thu Jun 28 16:47:12 2018
@@ -100,7 +100,7 @@ unsigned getMinWavesPerEU(const FeatureB
/// \returns Maximum number of waves per execution unit for given subtarget \p
/// Features without any kind of limitation.
-unsigned getMaxWavesPerEU(const FeatureBitset &Features);
+unsigned getMaxWavesPerEU();
/// \returns Maximum number of waves per execution unit for given subtarget \p
/// Features and limited by given \p FlatWorkGroupSize.
More information about the llvm-commits
mailing list