[llvm] r295554 - AMDGPU: Merge initial gfx9 support
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 18 10:29:53 PST 2017
Author: arsenm
Date: Sat Feb 18 12:29:53 2017
New Revision: 295554
URL: http://llvm.org/viewvc/llvm-project?rev=295554&view=rev
Log:
AMDGPU: Merge initial gfx9 support
Added:
llvm/trunk/test/MC/AMDGPU/sopp-gfx9.s
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h
llvm/trunk/lib/Target/AMDGPU/Processors.td
llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll
llvm/trunk/test/CodeGen/AMDGPU/debugger-reserve-regs.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll
llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.h?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h Sat Feb 18 12:29:53 2017
@@ -47,7 +47,7 @@ FunctionPass *createSIDebuggerInsertNops
FunctionPass *createSIInsertWaitsPass();
FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);
-ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
+ModulePass *createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM = nullptr);
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Sat Feb 18 12:29:53 2017
@@ -79,6 +79,12 @@ def FeatureUnalignedScratchAccess : Subt
"Support unaligned scratch loads and stores"
>;
+def FeatureApertureRegs : SubtargetFeature<"aperture-regs",
+ "HasApertureRegs",
+ "true",
+ "Has Memory Aperture Base and Size Registers"
+>;
+
// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
// XNACK. The current default kernel driver setting is:
// - graphics ring: XNACK disabled
@@ -377,6 +383,15 @@ def FeatureVolcanicIslands : SubtargetFe
]
>;
+def FeatureGFX9 : SubtargetFeatureGeneration<"GFX9",
+ [FeatureFP64, FeatureLocalMemorySize65536,
+ FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
+ FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
+ FeatureSMemRealTime, FeatureScalarStores, FeatureInv2PiInlineImm,
+ FeatureApertureRegs
+ ]
+>;
+
class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
list<SubtargetFeature> Implies>
: SubtargetFeature <
@@ -429,6 +444,9 @@ def FeatureISAVersion8_1_0 : SubtargetFe
FeatureLDSBankCount16,
FeatureXNACK]>;
+def FeatureISAVersion9_0_0 : SubtargetFeatureISAVersion <9,0,0,[]>;
+def FeatureISAVersion9_0_1 : SubtargetFeatureISAVersion <9,0,1,[]>;
+
//===----------------------------------------------------------------------===//
// Debugger related subtarget features.
//===----------------------------------------------------------------------===//
@@ -534,10 +552,10 @@ def isVI : Predicate <
"Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS">,
AssemblerPredicate<"FeatureGCN3Encoding">;
+// TODO: Either the name to be changed or we simply use IsCI!
def isCIVI : Predicate <
- "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS || "
- "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS"
->, AssemblerPredicate<"FeatureCIInsts">;
+ "Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
+ AssemblerPredicate<"FeatureCIInsts">;
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp Sat Feb 18 12:29:53 2017
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instructions.h"
@@ -26,6 +27,7 @@ namespace {
class AMDGPUAnnotateKernelFeatures : public ModulePass {
private:
+ const TargetMachine *TM;
static bool hasAddrSpaceCast(const Function &F);
void addAttrToCallers(Function *Intrin, StringRef AttrName);
@@ -34,7 +36,8 @@ private:
public:
static char ID;
- AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
+ AMDGPUAnnotateKernelFeatures(const TargetMachine *TM_ = nullptr) :
+ ModulePass(ID), TM(TM_) {}
bool runOnModule(Module &M) override;
StringRef getPassName() const override {
return "AMDGPU Annotate Kernel Features";
@@ -211,7 +214,9 @@ bool AMDGPUAnnotateKernelFeatures::runOn
if (F.hasFnAttribute("amdgpu-queue-ptr"))
continue;
- if (hasAddrSpaceCast(F))
+ bool HasApertureRegs =
+ TM && TM->getSubtarget<AMDGPUSubtarget>(F).hasApertureRegs();
+ if (!HasApertureRegs && hasAddrSpaceCast(F))
F.addFnAttr("amdgpu-queue-ptr");
}
}
@@ -219,6 +224,6 @@ bool AMDGPUAnnotateKernelFeatures::runOn
return Changed;
}
-ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass() {
- return new AMDGPUAnnotateKernelFeatures();
+ModulePass *llvm::createAMDGPUAnnotateKernelFeaturesPass(const TargetMachine *TM) {
+ return new AMDGPUAnnotateKernelFeatures(TM);
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Sat Feb 18 12:29:53 2017
@@ -381,6 +381,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(
case AMDGPU::EXEC_HI:
case AMDGPU::SCC:
case AMDGPU::M0:
+ case AMDGPU::SRC_SHARED_BASE:
+ case AMDGPU::SRC_SHARED_LIMIT:
+ case AMDGPU::SRC_PRIVATE_BASE:
+ case AMDGPU::SRC_PRIVATE_LIMIT:
continue;
case AMDGPU::VCC:
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp Sat Feb 18 12:29:53 2017
@@ -86,6 +86,7 @@ static SIEncodingFamily subtargetEncodin
case AMDGPUSubtarget::SEA_ISLANDS:
return SIEncodingFamily::SI;
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
+ case AMDGPUSubtarget::GFX9:
return SIEncodingFamily::VI;
// FIXME: This should never be called for r600 GPUs.
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Sat Feb 18 12:29:53 2017
@@ -93,6 +93,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
UnalignedScratchAccess(false),
UnalignedBufferAccess(false),
+ HasApertureRegs(false),
EnableXNACK(false),
TrapHandler(false),
DebuggerInsertNops(false),
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Sat Feb 18 12:29:53 2017
@@ -51,6 +51,7 @@ public:
SOUTHERN_ISLANDS,
SEA_ISLANDS,
VOLCANIC_ISLANDS,
+ GFX9,
};
enum {
@@ -64,6 +65,8 @@ public:
ISAVersion8_0_3,
ISAVersion8_0_4,
ISAVersion8_1_0,
+ ISAVersion9_0_0,
+ ISAVersion9_0_1
};
enum TrapHandlerAbi {
@@ -103,6 +106,7 @@ protected:
bool FlatForGlobal;
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
+ bool HasApertureRegs;
bool EnableXNACK;
bool TrapHandler;
bool DebuggerInsertNops;
@@ -330,6 +334,10 @@ public:
return UnalignedScratchAccess;
}
+ bool hasApertureRegs() const {
+ return HasApertureRegs;
+ }
+
bool isTrapHandlerEnabled() const {
return TrapHandler;
}
@@ -645,6 +653,14 @@ public:
return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
}
+ bool hasSMovFedHazard() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
+ bool hasReadM0Hazard() const {
+ return getGeneration() >= AMDGPUSubtarget::GFX9;
+ }
+
unsigned getKernArgSegmentSize(const MachineFunction &MF, unsigned ExplictArgBytes) const;
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
@@ -656,7 +672,13 @@ public:
/// \returns True if waitcnt instruction is needed before barrier instruction,
/// false otherwise.
bool needWaitcntBeforeBarrier() const {
- return true;
+ return getGeneration() < GFX9;
+ }
+
+ /// \returns true if the flat_scratch register should be initialized with the
+ /// pointer to the wave's scratch memory rather than a size and offset.
+ bool flatScratchIsPointer() const {
+ return getGeneration() >= GFX9;
}
/// \returns SGPR allocation granularity supported by the subtarget.
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp Sat Feb 18 12:29:53 2017
@@ -598,7 +598,8 @@ bool GCNPassConfig::addPreISel() {
// FIXME: We need to run a pass to propagate the attributes when calls are
// supported.
- addPass(&AMDGPUAnnotateKernelFeaturesID);
+ const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
+ addPass(createAMDGPUAnnotateKernelFeaturesPass(&TM));
addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
addPass(createSinkingPass());
addPass(createSITypeRewriter());
Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Sat Feb 18 12:29:53 2017
@@ -71,6 +71,18 @@ static bool isRFE(unsigned Opcode) {
return Opcode == AMDGPU::S_RFE_B64;
}
+static bool isSMovRel(unsigned Opcode) {
+ return Opcode == AMDGPU::S_MOVRELS_B32 || AMDGPU::S_MOVRELS_B64 ||
+ Opcode == AMDGPU::S_MOVRELD_B32 || AMDGPU::S_MOVRELD_B64;
+}
+
+static bool isVInterp(unsigned Opcode) {
+ return Opcode == AMDGPU::V_INTERP_P1_F32 ||
+ Opcode == AMDGPU::V_INTERP_P1_F32_16bank ||
+ Opcode == AMDGPU::V_INTERP_P2_F32 ||
+ Opcode == AMDGPU::V_INTERP_MOV_F32;
+}
+
static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
AMDGPU::OpName::simm16);
@@ -108,6 +120,13 @@ GCNHazardRecognizer::getHazardType(SUnit
if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
return NoopHazard;
+ if ((isVInterp(MI->getOpcode()) || isSMovRel(MI->getOpcode())) &&
+ checkReadM0Hazards(MI) > 0)
+ return NoopHazard;
+
+ if (checkAnyInstHazards(MI) > 0)
+ return NoopHazard;
+
return NoHazard;
}
@@ -116,11 +135,13 @@ unsigned GCNHazardRecognizer::PreEmitNoo
}
unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
+ int WaitStates = std::max(0, checkAnyInstHazards(MI));
+
if (SIInstrInfo::isSMRD(*MI))
- return std::max(0, checkSMRDHazards(MI));
+ return std::max(WaitStates, checkSMRDHazards(MI));
if (SIInstrInfo::isVALU(*MI)) {
- int WaitStates = std::max(0, checkVALUHazards(MI));
+ WaitStates = std::max(WaitStates, checkVALUHazards(MI));
if (SIInstrInfo::isVMEM(*MI))
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
@@ -134,19 +155,25 @@ unsigned GCNHazardRecognizer::PreEmitNoo
if (isRWLane(MI->getOpcode()))
WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
+ if (isVInterp(MI->getOpcode()))
+ WaitStates = std::max(WaitStates, checkReadM0Hazards(MI));
+
return WaitStates;
}
if (isSGetReg(MI->getOpcode()))
- return std::max(0, checkGetRegHazards(MI));
+ return std::max(WaitStates, checkGetRegHazards(MI));
if (isSSetReg(MI->getOpcode()))
- return std::max(0, checkSetRegHazards(MI));
+ return std::max(WaitStates, checkSetRegHazards(MI));
if (isRFE(MI->getOpcode()))
- return std::max(0, checkRFEHazards(MI));
+ return std::max(WaitStates, checkRFEHazards(MI));
- return 0;
+ if (isSMovRel(MI->getOpcode()))
+ return std::max(WaitStates, checkReadM0Hazards(MI));
+
+ return WaitStates;
}
void GCNHazardRecognizer::EmitNoop() {
@@ -508,3 +535,42 @@ int GCNHazardRecognizer::checkRFEHazards
int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
return RFEWaitStates - WaitStatesNeeded;
}
+
+int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
+ if (MI->isDebugValue())
+ return 0;
+
+ const SIRegisterInfo *TRI = ST.getRegisterInfo();
+ if (!ST.hasSMovFedHazard())
+ return 0;
+
+ // Check for any instruction reading an SGPR after a write from
+ // s_mov_fed_b32.
+ int MovFedWaitStates = 1;
+ int WaitStatesNeeded = 0;
+
+ for (const MachineOperand &Use : MI->uses()) {
+ if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
+ continue;
+ auto IsHazardFn = [] (MachineInstr *MI) {
+ return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
+ };
+ int WaitStatesNeededForUse =
+ MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn);
+ WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
+ }
+
+ return WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
+ if (!ST.hasReadM0Hazard())
+ return 0;
+
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ int SMovRelWaitStates = 1;
+ auto IsHazardFn = [TII] (MachineInstr *MI) {
+ return TII->isSALU(*MI);
+ };
+ return SMovRelWaitStates - getWaitStatesSinceDef(AMDGPU::M0, IsHazardFn);
+}
Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.h Sat Feb 18 12:29:53 2017
@@ -52,6 +52,8 @@ class GCNHazardRecognizer final : public
int checkVALUHazards(MachineInstr *VALU);
int checkRWLaneHazards(MachineInstr *RWLane);
int checkRFEHazards(MachineInstr *RFE);
+ int checkAnyInstHazards(MachineInstr *MI);
+ int checkReadM0Hazards(MachineInstr *SMovRel);
public:
GCNHazardRecognizer(const MachineFunction &MF);
// We can only issue one instruction per cycle.
Modified: llvm/trunk/lib/Target/AMDGPU/Processors.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Processors.td?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Processors.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/Processors.td Sat Feb 18 12:29:53 2017
@@ -187,3 +187,10 @@ def : ProcessorModel<"gfx810", SIQuarter
[FeatureISAVersion8_1_0]
>;
+def : ProcessorModel<"gfx900", SIQuarterSpeedModel,
+ [FeatureGFX9, FeatureISAVersion9_0_0, FeatureLDSBankCount32]
+>;
+
+def : ProcessorModel<"gfx901", SIQuarterSpeedModel,
+ [FeatureGFX9, FeatureXNACK, FeatureISAVersion9_0_1, FeatureLDSBankCount32]
+>;
Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp Sat Feb 18 12:29:53 2017
@@ -33,10 +33,12 @@ static ArrayRef<MCPhysReg> getAllSGPRs(c
ST.getMaxNumSGPRs(MF));
}
-void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,
- const SIRegisterInfo* TRI,
+void SIFrameLowering::emitFlatScratchInit(const SISubtarget &ST,
MachineFunction &MF,
MachineBasicBlock &MBB) const {
+ const SIInstrInfo *TII = ST.getInstrInfo();
+ const SIRegisterInfo* TRI = &TII->getRegisterInfo();
+
// We don't need this if we only have spills since there is no user facing
// scratch.
@@ -59,16 +61,28 @@ void SIFrameLowering::emitFlatScratchIni
MRI.addLiveIn(FlatScratchInitReg);
MBB.addLiveIn(FlatScratchInitReg);
- // Copy the size in bytes.
- unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
- .addReg(FlatScrInitHi, RegState::Kill);
-
unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
+ unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+ // Do a 64-bit pointer add.
+ if (ST.flatScratchIsPointer()) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), AMDGPU::FLAT_SCR_LO)
+ .addReg(FlatScrInitLo)
+ .addReg(ScratchWaveOffsetReg);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADDC_U32), AMDGPU::FLAT_SCR_HI)
+ .addReg(FlatScrInitHi)
+ .addImm(0);
+
+ return;
+ }
+
+ // Copy the size in bytes.
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
+ .addReg(FlatScrInitHi, RegState::Kill);
+
// Add wave offset in bytes to private base offset.
// See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
@@ -229,7 +243,7 @@ void SIFrameLowering::emitPrologue(Machi
// emitted after frame indices are eliminated.
if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
- emitFlatScratchInit(TII, TRI, MF, MBB);
+ emitFlatScratchInit(ST, MF, MBB);
// We need to insert initialization of the scratch resource descriptor.
unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.h Sat Feb 18 12:29:53 2017
@@ -36,8 +36,7 @@ public:
RegScavenger *RS = nullptr) const override;
private:
- void emitFlatScratchInit(const SIInstrInfo *TII,
- const SIRegisterInfo* TRI,
+ void emitFlatScratchInit(const SISubtarget &ST,
MachineFunction &MF,
MachineBasicBlock &MBB) const;
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Sat Feb 18 12:29:53 2017
@@ -532,7 +532,7 @@ bool SITargetLowering::isLegalAddressing
// in 8-bits, it can use a smaller encoding.
if (!isUInt<32>(AM.BaseOffs / 4))
return false;
- } else if (Subtarget->getGeneration() == SISubtarget::VOLCANIC_ISLANDS) {
+ } else if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
// On VI, these use the SMEM format and the offset is 20-bit in bytes.
if (!isUInt<20>(AM.BaseOffs))
return false;
@@ -2233,6 +2233,13 @@ SDValue SITargetLowering::lowerFP_ROUND(
SDValue SITargetLowering::getSegmentAperture(unsigned AS,
SelectionDAG &DAG) const {
+
+ if (Subtarget->hasApertureRegs()) { // Read from Aperture Registers directly.
+ unsigned RegNo = (AS == AMDGPUAS::LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE :
+ AMDGPU::SRC_PRIVATE_BASE;
+ return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, RegNo, MVT::i32);
+ }
+
SDLoc SL;
MachineFunction &MF = DAG.getMachineFunction();
SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.cpp Sat Feb 18 12:29:53 2017
@@ -133,6 +133,12 @@ BitVector SIRegisterInfo::getReservedReg
reserveRegisterTuples(Reserved, AMDGPU::EXEC);
reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
+ // Reserve the memory aperture registers.
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
+ reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
+
// Reserve Trap Handler registers - support is not implemented in Codegen.
reserveRegisterTuples(Reserved, AMDGPU::TBA);
reserveRegisterTuples(Reserved, AMDGPU::TMA);
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td Sat Feb 18 12:29:53 2017
@@ -44,6 +44,11 @@ def EXEC : RegisterWithSubRegs<"EXEC", [
def SCC : SIReg<"scc", 253>;
def M0 : SIReg <"m0", 124>;
+def SRC_SHARED_BASE : SIReg<"src_shared_base", 235>;
+def SRC_SHARED_LIMIT : SIReg<"src_shared_limit", 236>;
+def SRC_PRIVATE_BASE : SIReg<"src_private_base", 237>;
+def SRC_PRIVATE_LIMIT : SIReg<"src_private_limit", 238>;
+
// Trap handler registers
def TBA_LO : SIReg<"tba_lo", 108>;
def TBA_HI : SIReg<"tba_hi", 109>;
@@ -260,7 +265,8 @@ def VGPR_512 : RegisterTuples<[sub0, sub
// See comments in SIInstructions.td for more info.
def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
(add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
- TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)> {
+ TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI, SRC_SHARED_BASE, SRC_SHARED_LIMIT,
+ SRC_PRIVATE_BASE, SRC_PRIVATE_LIMIT)> {
let AllocationPriority = 7;
}
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Sat Feb 18 12:29:53 2017
@@ -72,11 +72,11 @@ unsigned unpackBits(unsigned Src, unsign
return (Src & getBitMask(Shift, Width)) >> Shift;
}
-/// \returns Vmcnt bit shift.
-unsigned getVmcntBitShift() { return 0; }
+/// \returns Vmcnt bit shift (lower bits).
+unsigned getVmcntBitShiftLo() { return 0; }
-/// \returns Vmcnt bit width.
-unsigned getVmcntBitWidth() { return 4; }
+/// \returns Vmcnt bit width (lower bits).
+unsigned getVmcntBitWidthLo() { return 4; }
/// \returns Expcnt bit shift.
unsigned getExpcntBitShift() { return 4; }
@@ -90,6 +90,12 @@ unsigned getLgkmcntBitShift() { return 8
/// \returns Lgkmcnt bit width.
unsigned getLgkmcntBitWidth() { return 4; }
+/// \returns Vmcnt bit shift (higher bits).
+unsigned getVmcntBitShiftHi() { return 14; }
+
+/// \returns Vmcnt bit width (higher bits).
+unsigned getVmcntBitWidthHi() { return 2; }
+
} // end namespace anonymous
namespace llvm {
@@ -120,6 +126,12 @@ IsaVersion getIsaVersion(const FeatureBi
if (Features.test(FeatureISAVersion8_1_0))
return {8, 1, 0};
+ // GFX9.
+ if (Features.test(FeatureISAVersion9_0_0))
+ return {9, 0, 0};
+ if (Features.test(FeatureISAVersion9_0_1))
+ return {9, 0, 1};
+
if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
return {0, 0, 0};
return {7, 0, 0};
@@ -399,7 +411,12 @@ std::pair<int, int> getIntegerPairAttrib
}
unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
- return (1 << getVmcntBitWidth()) - 1;
+ unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
+ if (Version.Major < 9)
+ return VmcntLo;
+
+ unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
+ return VmcntLo | VmcntHi;
}
unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
@@ -411,14 +428,27 @@ unsigned getLgkmcntBitMask(const IsaInfo
}
unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
- unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
+ unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
- return Vmcnt | Expcnt | Lgkmcnt;
+ unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
+ if (Version.Major < 9)
+ return Waitcnt;
+
+ unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
+ return Waitcnt | VmcntHi;
}
unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
- return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
+ unsigned VmcntLo =
+ unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ if (Version.Major < 9)
+ return VmcntLo;
+
+ unsigned VmcntHi =
+ unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
+ VmcntHi <<= getVmcntBitWidthLo();
+ return VmcntLo | VmcntHi;
}
unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
@@ -438,7 +468,13 @@ void decodeWaitcnt(const IsaInfo::IsaVer
unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
unsigned Vmcnt) {
- return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
+ Waitcnt =
+ packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
+ if (Version.Major < 9)
+ return Waitcnt;
+
+ Vmcnt >>= getVmcntBitWidthLo();
+ return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
}
unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Sat Feb 18 12:29:53 2017
@@ -216,7 +216,8 @@ unsigned decodeLgkmcnt(const IsaInfo::Is
/// \p Lgkmcnt respectively.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
-/// \p Vmcnt = \p Waitcnt[3:0]
+/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
+/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
/// \p Expcnt = \p Waitcnt[6:4]
/// \p Lgkmcnt = \p Waitcnt[11:8]
void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
@@ -238,9 +239,11 @@ unsigned encodeLgkmcnt(const IsaInfo::Is
/// \p Version.
///
/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
-/// Waitcnt[3:0] = \p Vmcnt
-/// Waitcnt[6:4] = \p Expcnt
-/// Waitcnt[11:8] = \p Lgkmcnt
+/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
+/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
+/// Waitcnt[6:4] = \p Expcnt
+/// Waitcnt[11:8] = \p Lgkmcnt
+/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
/// isa \p Version.
Modified: llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/addrspacecast.ll Sat Feb 18 12:29:53 2017
@@ -1,14 +1,19 @@
-; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=CI %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=HSA -check-prefix=GFX9 %s
; HSA-LABEL: {{^}}use_group_to_flat_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_dispatch_ptr = 0
-; HSA: enable_sgpr_queue_ptr = 1
+; CI: enable_sgpr_queue_ptr = 1
+; GFX9: enable_sgpr_queue_ptr = 0
-; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
-; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
+; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
+; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10{{$}}
+; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
+
+; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
+; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_shared_base
-; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
@@ -17,6 +22,12 @@
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
+
+; At most 2 digits. Make sure src_shared_base is not counted as a high
+; number SGPR.
+
+; CI: NumSgprs: {{[0-9][0-9]+}}
+; GFX9: NumSgprs: {{[0-9]+}}
define void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #0 {
%stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %stof
@@ -26,12 +37,16 @@ define void @use_group_to_flat_addrspace
; HSA-LABEL: {{^}}use_private_to_flat_addrspacecast:
; HSA: enable_sgpr_private_segment_buffer = 1
; HSA: enable_sgpr_dispatch_ptr = 0
-; HSA: enable_sgpr_queue_ptr = 1
+; CI: enable_sgpr_queue_ptr = 1
+; GFX9: enable_sgpr_queue_ptr = 0
-; HSA-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
-; HSA-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
+; CI-DAG: s_load_dword [[PTR:s[0-9]+]], s[6:7], 0x0{{$}}
+; CI-DAG: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11{{$}}
+; CI-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
+
+; GFX9-DAG: s_load_dword [[PTR:s[0-9]+]], s[4:5], 0x0{{$}}
+; GFX9-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], src_private_base
-; HSA-DAG: v_mov_b32_e32 [[VAPERTURE:v[0-9]+]], [[APERTURE]]
; HSA-DAG: v_mov_b32_e32 [[VPTR:v[0-9]+]], [[PTR]]
; HSA-DAG: v_cmp_ne_u32_e64 vcc, [[PTR]], -1
@@ -40,6 +55,9 @@ define void @use_group_to_flat_addrspace
; HSA-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 7
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, [[K]]
+
+; CI: NumSgprs: {{[0-9][0-9]+}}
+; GFX9: NumSgprs: {{[0-9]+}}
define void @use_private_to_flat_addrspacecast(i32* %ptr) #0 {
%stof = addrspacecast i32* %ptr to i32 addrspace(4)*
store volatile i32 7, i32 addrspace(4)* %stof
@@ -133,8 +151,10 @@ define void @use_flat_to_constant_addrsp
}
; HSA-LABEL: {{^}}cast_0_group_to_flat_addrspacecast:
-; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
-; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x10
+; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_shared_base
+
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
@@ -176,8 +196,11 @@ define void @cast_neg1_flat_to_group_add
}
; HSA-LABEL: {{^}}cast_0_private_to_flat_addrspacecast:
-; HSA: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
-; HSA-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+; CI: s_load_dword [[APERTURE:s[0-9]+]], s[4:5], 0x11
+; CI-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE]]
+
+; GFX9-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], src_private_base
+
; HSA-DAG: v_mov_b32_e32 v[[LO:[0-9]+]], 0{{$}}
; HSA-DAG: v_mov_b32_e32 v[[K:[0-9]+]], 7{{$}}
; HSA: flat_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}, v[[K]]
@@ -226,9 +249,13 @@ end:
; Check for prologue initializing special SGPRs pointing to scratch.
; HSA-LABEL: {{^}}store_flat_scratch:
-; HSA-DAG: s_mov_b32 flat_scratch_lo, s9
-; HSA-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
-; HSA: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
+; CI-DAG: s_mov_b32 flat_scratch_lo, s9
+; CI-DAG: s_add_u32 [[ADD:s[0-9]+]], s8, s11
+; CI: s_lshr_b32 flat_scratch_hi, [[ADD]], 8
+
+; GFX9: s_add_u32 flat_scratch_lo, s6, s9
+; GFX9: s_addc_u32 flat_scratch_hi, s7, 0
+
; HSA: flat_store_dword
; HSA: s_barrier
; HSA: flat_load_dword
Modified: llvm/trunk/test/CodeGen/AMDGPU/debugger-reserve-regs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/debugger-reserve-regs.ll?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/debugger-reserve-regs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/debugger-reserve-regs.ll Sat Feb 18 12:29:53 2017
@@ -1,4 +1,5 @@
; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -O0 -mtriple=amdgcn--amdhsa -mcpu=gfx901 -mattr=+amdgpu-debugger-reserve-regs -verify-machineinstrs < %s | FileCheck %s
; CHECK: reserved_vgpr_first = {{[0-9]+}}
; CHECK-NEXT: reserved_vgpr_count = 4
; CHECK: ReservedVGPRFirst: {{[0-9]+}}
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll Sat Feb 18 12:29:53 2017
@@ -13,6 +13,8 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx803 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI803 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx804 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI804 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx810 | FileCheck --check-prefix=HSA --check-prefix=HSA-VI810 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX900 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx901 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX901 %s
; HSA: .hsa_code_object_version 2,1
; HSA-CI700: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
@@ -24,3 +26,5 @@
; HSA-VI803: .hsa_code_object_isa 8,0,3,"AMD","AMDGPU"
; HSA-VI804: .hsa_code_object_isa 8,0,4,"AMD","AMDGPU"
; HSA-VI810: .hsa_code_object_isa 8,1,0,"AMD","AMDGPU"
+; HSA-GFX900: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
+; HSA-GFX901: .hsa_code_object_isa 9,0,1,"AMD","AMDGPU"
Modified: llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/inserted-wait-states.mir Sat Feb 18 12:29:53 2017
@@ -1,6 +1,7 @@
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN
# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI,GFX9
--- |
define void @div_fmas() { ret void }
@@ -9,6 +10,37 @@
define void @vmem_gt_8dw_store() { ret void }
define void @readwrite_lane() { ret void }
define void @rfe() { ret void }
+ define void @s_mov_fed_b32() { ret void }
+ define void @s_movrel() { ret void }
+ define void @v_interp() { ret void }
+
+ define void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) {
+ entry:
+ %A.addr = alloca i32 addrspace(1)*, align 4
+ store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 4
+ call void @llvm.dbg.declare(metadata i32 addrspace(1)** %A.addr, metadata !5, metadata !11), !dbg !12
+ ret void
+ }
+
+ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!3, !4}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+ !1 = !DIFile(filename: "test01.cl", directory: "/dev/null")
+ !2 = !{}
+ !3 = !{i32 2, !"Dwarf Version", i32 2}
+ !4 = !{i32 2, !"Debug Info Version", i32 3}
+ !5 = !DILocalVariable(name: "A", arg: 1, scope: !6, file: !1, line: 1, type: !9)
+ !6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+ !7 = !DISubroutineType(types: !8)
+ !8 = !{null, !9}
+ !9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 32)
+ !10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+ !11 = !DIExpression()
+ !12 = !DILocation(line: 1, column: 30, scope: !6)
+
...
---
# GCN-LABEL: name: div_fmas
@@ -331,3 +363,185 @@ body: |
S_ENDPGM
...
+
+...
+---
+
+# GCN-LABEL: name: s_mov_fed_b32
+
+# GCN-LABEL: bb.0:
+# GCN: S_MOV_FED_B32
+# GFX9: S_NOP
+# GCN-NEXT: S_MOV_B32
+
+# GCN-LABEL: bb.1:
+# GCN: S_MOV_FED_B32
+# GFX9: S_NOP
+# GCN-NEXT: V_MOV_B32
+name: s_mov_fed_b32
+
+body: |
+ bb.0:
+ successors: %bb.1
+ %sgpr0 = S_MOV_FED_B32 %sgpr0
+ %sgpr0 = S_MOV_B32 %sgpr0
+ S_BRANCH %bb.1
+
+ bb.1:
+ %sgpr0 = S_MOV_FED_B32 %sgpr0
+ %vgpr0 = V_MOV_B32_e32 %sgpr0, implicit %exec
+ S_ENDPGM
+
+...
+
+...
+---
+
+# GCN-LABEL: name: s_movrel
+
+# GCN-LABEL: bb.0:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: S_MOVRELS_B32
+
+# GCN-LABEL: bb.1:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: S_MOVRELS_B64
+
+# GCN-LABEL: bb.2:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: S_MOVRELD_B32
+
+# GCN-LABEL: bb.3:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: S_MOVRELD_B64
+
+name: s_movrel
+
+body: |
+ bb.0:
+ successors: %bb.1
+ %m0 = S_MOV_B32 0
+ %sgpr0 = S_MOVRELS_B32 %sgpr0, implicit %m0
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2
+ %m0 = S_MOV_B32 0
+ %sgpr0_sgpr1 = S_MOVRELS_B64 %sgpr0_sgpr1, implicit %m0
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3
+ %m0 = S_MOV_B32 0
+ %sgpr0 = S_MOVRELD_B32 %sgpr0, implicit %m0
+ S_BRANCH %bb.3
+
+ bb.3:
+ %m0 = S_MOV_B32 0
+ %sgpr0_sgpr1 = S_MOVRELD_B64 %sgpr0_sgpr1, implicit %m0
+ S_ENDPGM
+...
+
+...
+---
+
+# GCN-LABEL: name: v_interp
+
+# GCN-LABEL: bb.0:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: V_INTERP_P1_F32
+
+# GCN-LABEL: bb.1:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: V_INTERP_P2_F32
+
+# GCN-LABEL: bb.2:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: V_INTERP_P1_F32_16bank
+
+# GCN-LABEL: bb.3:
+# GCN: S_MOV_B32
+# GFX9: S_NOP
+# GCN-NEXT: V_INTERP_MOV_F32
+
+name: v_interp
+
+body: |
+ bb.0:
+ successors: %bb.1
+ %m0 = S_MOV_B32 0
+ %vgpr0 = V_INTERP_P1_F32 %vgpr0, 0, 0, implicit %m0, implicit %exec
+ S_BRANCH %bb.1
+
+ bb.1:
+ successors: %bb.2
+ %m0 = S_MOV_B32 0
+ %vgpr0 = V_INTERP_P2_F32 %vgpr0, %vgpr1, 0, 0, implicit %m0, implicit %exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ successors: %bb.3
+ %m0 = S_MOV_B32 0
+ %vgpr0 = V_INTERP_P1_F32_16bank %vgpr0, 0, 0, implicit %m0, implicit %exec
+ S_BRANCH %bb.3
+
+ bb.3:
+ %m0 = S_MOV_B32 0
+ %vgpr0 = V_INTERP_MOV_F32 0, 0, 0, implicit %m0, implicit %exec
+ S_ENDPGM
+...
+---
+name: mov_fed_hazard_crash_on_dbg_value
+alignment: 0
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+liveins:
+ - { reg: '%sgpr4_sgpr5' }
+ - { reg: '%sgpr6_sgpr7' }
+ - { reg: '%sgpr9' }
+ - { reg: '%sgpr0_sgpr1_sgpr2_sgpr3' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 16
+ offsetAdjustment: 0
+ maxAlignment: 8
+ adjustsStack: false
+ hasCalls: false
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+stack:
+ - { id: 0, name: A.addr, offset: 0, size: 8, alignment: 8, local-offset: 0 }
+ - { id: 1, offset: 8, size: 4, alignment: 4 }
+body: |
+ bb.0.entry:
+ liveins: %sgpr4_sgpr5, %sgpr6_sgpr7, %sgpr9, %sgpr0_sgpr1_sgpr2_sgpr3
+
+ %flat_scr_lo = S_ADD_U32 %sgpr6, %sgpr9, implicit-def %scc
+ %flat_scr_hi = S_ADDC_U32 %sgpr7, 0, implicit-def %scc, implicit %scc
+ DBG_VALUE _, 2, !5, !11, debug-location !12
+ %sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr4_sgpr5, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`)
+ dead %sgpr6_sgpr7 = KILL %sgpr4_sgpr5
+ %sgpr8 = S_MOV_B32 %sgpr5
+ %vgpr0 = V_MOV_B32_e32 killed %sgpr8, implicit %exec
+ BUFFER_STORE_DWORD_OFFSET %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr9, 4, 0, 0, 0, implicit %exec :: (store 4 into %ir.A.addr + 4)
+ %sgpr8 = S_MOV_B32 %sgpr4, implicit killed %sgpr4_sgpr5
+ %vgpr0 = V_MOV_B32_e32 killed %sgpr8, implicit %exec
+ BUFFER_STORE_DWORD_OFFSET %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr9, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.A.addr)
+ S_ENDPGM
+
+...
Modified: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll?rev=295554&r1=295553&r2=295554&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll Sat Feb 18 12:29:53 2017
@@ -1,8 +1,11 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
; GCN-LABEL: {{^}}test_barrier:
-; GCN: buffer_store_dword
-; GCN: s_waitcnt
+; GFX8: buffer_store_dword
+; GFX8: s_waitcnt
+; GFX9: flat_store_dword
+; GFX9-NOT: s_waitcnt
; GCN: s_barrier
define void @test_barrier(i32 addrspace(1)* %out) #0 {
entry:
Added: llvm/trunk/test/MC/AMDGPU/sopp-gfx9.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/sopp-gfx9.s?rev=295554&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/sopp-gfx9.s (added)
+++ llvm/trunk/test/MC/AMDGPU/sopp-gfx9.s Sat Feb 18 12:29:53 2017
@@ -0,0 +1,71 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -show-encoding %s | FileCheck --check-prefix=GFX9 %s
+
+//===----------------------------------------------------------------------===//
+// s_waitcnt
+//===----------------------------------------------------------------------===//
+
+s_waitcnt 0
+// GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+
+s_waitcnt vmcnt(0) & expcnt(0) & lgkmcnt(0)
+// GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+
+s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+// GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+
+s_waitcnt vmcnt(0), expcnt(0), lgkmcnt(0)
+// GFX9: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
+
+s_waitcnt vmcnt(1)
+// GFX9: s_waitcnt vmcnt(1) ; encoding: [0x71,0x0f,0x8c,0xbf]
+
+s_waitcnt vmcnt(9)
+// GFX9: s_waitcnt vmcnt(9) ; encoding: [0x79,0x0f,0x8c,0xbf]
+
+s_waitcnt expcnt(2)
+// GFX9: s_waitcnt expcnt(2) ; encoding: [0x2f,0xcf,0x8c,0xbf]
+
+s_waitcnt lgkmcnt(3)
+// GFX9: s_waitcnt lgkmcnt(3) ; encoding: [0x7f,0xc3,0x8c,0xbf]
+
+s_waitcnt lgkmcnt(9)
+// GFX9: s_waitcnt lgkmcnt(9) ; encoding: [0x7f,0xc9,0x8c,0xbf]
+
+s_waitcnt vmcnt(0), expcnt(0)
+// GFX9: s_waitcnt vmcnt(0) expcnt(0) ; encoding: [0x00,0x0f,0x8c,0xbf]
+
+s_waitcnt vmcnt(15)
+// GFX9: s_waitcnt vmcnt(15) ; encoding: [0x7f,0x0f,0x8c,0xbf]
+
+s_waitcnt vmcnt(15) expcnt(6)
+// GFX9: s_waitcnt vmcnt(15) expcnt(6) ; encoding: [0x6f,0x0f,0x8c,0xbf]
+
+s_waitcnt vmcnt(15) lgkmcnt(14)
+// GFX9: s_waitcnt vmcnt(15) lgkmcnt(14) ; encoding: [0x7f,0x0e,0x8c,0xbf]
+
+s_waitcnt vmcnt(15) expcnt(6) lgkmcnt(14)
+// GFX9: s_waitcnt vmcnt(15) expcnt(6) lgkmcnt(14) ; encoding: [0x6f,0x0e,0x8c,0xbf]
+
+s_waitcnt vmcnt(31)
+// GFX9: s_waitcnt vmcnt(31) ; encoding: [0x7f,0x4f,0x8c,0xbf]
+
+s_waitcnt vmcnt(31) expcnt(6)
+// GFX9: s_waitcnt vmcnt(31) expcnt(6) ; encoding: [0x6f,0x4f,0x8c,0xbf]
+
+s_waitcnt vmcnt(31) lgkmcnt(14)
+// GFX9: s_waitcnt vmcnt(31) lgkmcnt(14) ; encoding: [0x7f,0x4e,0x8c,0xbf]
+
+s_waitcnt vmcnt(31) expcnt(6) lgkmcnt(14)
+// GFX9: s_waitcnt vmcnt(31) expcnt(6) lgkmcnt(14) ; encoding: [0x6f,0x4e,0x8c,0xbf]
+
+s_waitcnt vmcnt(62)
+// GFX9: s_waitcnt vmcnt(62) ; encoding: [0x7e,0xcf,0x8c,0xbf]
+
+s_waitcnt vmcnt(62) expcnt(6)
+// GFX9: s_waitcnt vmcnt(62) expcnt(6) ; encoding: [0x6e,0xcf,0x8c,0xbf]
+
+s_waitcnt vmcnt(62) lgkmcnt(14)
+// GFX9: s_waitcnt vmcnt(62) lgkmcnt(14) ; encoding: [0x7e,0xce,0x8c,0xbf]
+
+s_waitcnt vmcnt(62) expcnt(6) lgkmcnt(14)
+// GFX9: s_waitcnt vmcnt(62) expcnt(6) lgkmcnt(14) ; encoding: [0x6e,0xce,0x8c,0xbf]
More information about the llvm-commits
mailing list