[llvm] r225988 - R600/SI: Spill VGPRs to scratch space for compute shaders
Tom Stellard
thomas.stellard at amd.com
Wed Jan 14 07:42:31 PST 2015
Author: tstellar
Date: Wed Jan 14 09:42:31 2015
New Revision: 225988
URL: http://llvm.org/viewvc/llvm-project?rev=225988&view=rev
Log:
R600/SI: Spill VGPRs to scratch space for compute shaders
Added:
llvm/trunk/lib/Target/R600/SIPrepareScratchRegs.cpp
Modified:
llvm/trunk/lib/Target/R600/AMDGPU.h
llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp
llvm/trunk/lib/Target/R600/CMakeLists.txt
llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
llvm/trunk/lib/Target/R600/SIInstrInfo.td
llvm/trunk/lib/Target/R600/SIInstructions.td
llvm/trunk/lib/Target/R600/SIMachineFunctionInfo.cpp
llvm/trunk/lib/Target/R600/SIMachineFunctionInfo.h
llvm/trunk/lib/Target/R600/SIRegisterInfo.cpp
llvm/trunk/lib/Target/R600/SIRegisterInfo.h
Modified: llvm/trunk/lib/Target/R600/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPU.h?rev=225988&r1=225987&r2=225988&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/R600/AMDGPU.h Wed Jan 14 09:42:31 2015
@@ -47,6 +47,7 @@ FunctionPass *createSIFixSGPRCopiesPass(
FunctionPass *createSIFixSGPRLiveRangesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
+FunctionPass *createSIPrepareScratchRegs();
void initializeSIFoldOperandsPass(PassRegistry &);
extern char &SIFoldOperandsID;
Modified: llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp?rev=225988&r1=225987&r2=225988&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp Wed Jan 14 09:42:31 2015
@@ -189,6 +189,7 @@ void AMDGPUPassConfig::addPostRegAlloc()
const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
+ addPass(createSIPrepareScratchRegs(), false);
addPass(createSIShrinkInstructionsPass(), false);
}
}
Modified: llvm/trunk/lib/Target/R600/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/CMakeLists.txt?rev=225988&r1=225987&r2=225988&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/CMakeLists.txt (original)
+++ llvm/trunk/lib/Target/R600/CMakeLists.txt Wed Jan 14 09:42:31 2015
@@ -51,6 +51,7 @@ add_llvm_target(R600CodeGen
SILowerControlFlow.cpp
SILowerI1Copies.cpp
SIMachineFunctionInfo.cpp
+ SIPrepareScratchRegs.cpp
SIRegisterInfo.cpp
SIShrinkInstructions.cpp
SITypeRewriter.cpp
Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.cpp?rev=225988&r1=225987&r2=225988&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIInstrInfo.cpp Wed Jan 14 09:42:31 2015
@@ -433,13 +433,9 @@ unsigned SIInstrInfo::getMovOpcode(const
static bool shouldTryToSpillVGPRs(MachineFunction *MF) {
SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- const TargetMachine &TM = MF->getTarget();
- // FIXME: Even though it can cause problems, we need to enable
- // spilling at -O0, since the fast register allocator always
- // spills registers that are live at the end of blocks.
- return MFI->getShaderType() == ShaderType::COMPUTE &&
- TM.getOptLevel() == CodeGenOpt::None;
+ // FIXME: Implement spilling for other shader types.
+ return MFI->getShaderType() == ShaderType::COMPUTE;
}
@@ -450,6 +446,7 @@ void SIInstrInfo::storeRegToStackSlot(Ma
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
MachineFunction *MF = MBB.getParent();
+ SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
MachineFrameInfo *FrameInfo = MF->getFrameInfo();
DebugLoc DL = MBB.findDebugLoc(MI);
int Opcode = -1;
@@ -466,6 +463,8 @@ void SIInstrInfo::storeRegToStackSlot(Ma
case 512: Opcode = AMDGPU::SI_SPILL_S512_SAVE; break;
}
} else if(shouldTryToSpillVGPRs(MF) && RI.hasVGPRs(RC)) {
+ MFI->setHasSpilledVGPRs();
+
switch(RC->getSize() * 8) {
case 32: Opcode = AMDGPU::SI_SPILL_V32_SAVE; break;
case 64: Opcode = AMDGPU::SI_SPILL_V64_SAVE; break;
@@ -480,7 +479,11 @@ void SIInstrInfo::storeRegToStackSlot(Ma
FrameInfo->setObjectAlignment(FrameIndex, 4);
BuildMI(MBB, MI, DL, get(Opcode))
.addReg(SrcReg)
- .addFrameIndex(FrameIndex);
+ .addFrameIndex(FrameIndex)
+ // Place-holder registers, these will be filled in by
+ // SIPrepareScratchRegs.
+ .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+ .addReg(AMDGPU::SGPR0, RegState::Undef);
} else {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Do not know how to"
@@ -522,7 +525,12 @@ void SIInstrInfo::loadRegFromStackSlot(M
if (Opcode != -1) {
FrameInfo->setObjectAlignment(FrameIndex, 4);
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
- .addFrameIndex(FrameIndex);
+ .addFrameIndex(FrameIndex)
+ // Place-holder registers, these will be filled in by
+ // SIPrepareScratchRegs.
+ .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+ .addReg(AMDGPU::SGPR0, RegState::Undef);
+
} else {
LLVMContext &Ctx = MF->getFunction()->getContext();
Ctx.emitError("SIInstrInfo::loadRegFromStackSlot - Do not know how to"
@@ -553,7 +561,7 @@ unsigned SIInstrInfo::calculateLDSSpillA
MachineBasicBlock::iterator Insert = Entry.front();
DebugLoc DL = Insert->getDebugLoc();
- TIDReg = RI.findUnusedVGPR(MF->getRegInfo());
+ TIDReg = RI.findUnusedRegister(MF->getRegInfo(), &AMDGPU::VGPR_32RegClass);
if (TIDReg == AMDGPU::NoRegister)
return TIDReg;
Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.td?rev=225988&r1=225987&r2=225988&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/R600/SIInstrInfo.td Wed Jan 14 09:42:31 2015
@@ -1763,6 +1763,7 @@ multiclass MUBUF_Load_Helper_vi <bits<7>
multiclass MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass,
ValueType store_vt, SDPatternOperator st> {
+ let mayLoad = 0, mayStore = 1 in {
let addr64 = 0 in {
def "" : MUBUF_si <
@@ -1820,6 +1821,7 @@ multiclass MUBUF_Store_Helper <bits<7> o
let tfe = 0;
let soffset = 128; // ZERO
}
+ } // End mayLoad = 0, mayStore = 1
}
class FLAT_Load_Helper <bits<7> op, string asm, RegisterClass regClass> :
Modified: llvm/trunk/lib/Target/R600/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstructions.td?rev=225988&r1=225987&r2=225988&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/R600/SIInstructions.td Wed Jan 14 09:42:31 2015
@@ -1940,18 +1940,20 @@ def V_SUB_F64 : InstSI <
multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
- def _SAVE : InstSI <
- (outs),
- (ins sgpr_class:$src, i32imm:$frame_idx),
- "", []
- >;
-
- def _RESTORE : InstSI <
- (outs sgpr_class:$dst),
- (ins i32imm:$frame_idx),
- "", []
- >;
-
+ let UseNamedOperandTable = 1 in {
+ def _SAVE : InstSI <
+ (outs),
+ (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
+ SReg_32:$scratch_offset),
+ "", []
+ >;
+
+ def _RESTORE : InstSI <
+ (outs sgpr_class:$dst),
+ (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
+ "", []
+ >;
+ } // End UseNamedOperandTable = 1
}
defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>;
@@ -1961,17 +1963,20 @@ defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
- def _SAVE : InstSI <
- (outs),
- (ins vgpr_class:$src, i32imm:$frame_idx),
- "", []
- >;
-
- def _RESTORE : InstSI <
- (outs vgpr_class:$dst),
- (ins i32imm:$frame_idx),
- "", []
- >;
+ let UseNamedOperandTable = 1 in {
+ def _SAVE : InstSI <
+ (outs),
+ (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
+ SReg_32:$scratch_offset),
+ "", []
+ >;
+
+ def _RESTORE : InstSI <
+ (outs vgpr_class:$dst),
+ (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
+ "", []
+ >;
+ } // End UseNamedOperandTable = 1
}
defm SI_SPILL_V32 : SI_SPILL_VGPR <VGPR_32>;
Modified: llvm/trunk/lib/Target/R600/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIMachineFunctionInfo.cpp?rev=225988&r1=225987&r2=225988&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIMachineFunctionInfo.cpp Wed Jan 14 09:42:31 2015
@@ -29,6 +29,7 @@ void SIMachineFunctionInfo::anchor() {}
SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
: AMDGPUMachineFunction(MF),
TIDReg(AMDGPU::NoRegister),
+ HasSpilledVGPRs(false),
PSInputAddr(0),
NumUserSGPRs(0),
LDSWaveSpillSize(0) { }
@@ -50,7 +51,7 @@ SIMachineFunctionInfo::SpilledReg SIMach
struct SpilledReg Spill;
if (!LaneVGPRs.count(LaneVGPRIdx)) {
- unsigned LaneVGPR = TRI->findUnusedVGPR(MRI);
+ unsigned LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass);
LaneVGPRs[LaneVGPRIdx] = LaneVGPR;
MRI.setPhysRegUsed(LaneVGPR);
Modified: llvm/trunk/lib/Target/R600/SIMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIMachineFunctionInfo.h?rev=225988&r1=225987&r2=225988&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/R600/SIMachineFunctionInfo.h Wed Jan 14 09:42:31 2015
@@ -29,6 +29,7 @@ class SIMachineFunctionInfo : public AMD
void anchor() override;
unsigned TIDReg;
+ bool HasSpilledVGPRs;
public:
@@ -52,6 +53,8 @@ public:
bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
unsigned getTIDReg() const { return TIDReg; };
void setTIDReg(unsigned Reg) { TIDReg = Reg; }
+ bool hasSpilledVGPRs() const { return HasSpilledVGPRs; }
+ void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; }
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
};
Added: llvm/trunk/lib/Target/R600/SIPrepareScratchRegs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIPrepareScratchRegs.cpp?rev=225988&view=auto
==============================================================================
--- llvm/trunk/lib/Target/R600/SIPrepareScratchRegs.cpp (added)
+++ llvm/trunk/lib/Target/R600/SIPrepareScratchRegs.cpp Wed Jan 14 09:42:31 2015
@@ -0,0 +1,196 @@
+//===-- SIPrepareScratchRegs.cpp - Use predicates for control flow --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// This pass loads scratch pointer and scratch offset into a register or a
+/// frame index which can be used anywhere in the program. These values will
+/// be used for spilling VGPRs.
+///
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+
+using namespace llvm;
+
+namespace {
+
+class SIPrepareScratchRegs : public MachineFunctionPass {
+
+private:
+ static char ID;
+
+public:
+ SIPrepareScratchRegs() : MachineFunctionPass(ID) { }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ const char *getPassName() const override {
+ return "SI prepare scratch registers";
+ }
+
+};
+
+} // End anonymous namespace
+
+char SIPrepareScratchRegs::ID = 0;
+
+FunctionPass *llvm::createSIPrepareScratchRegs() {
+ return new SIPrepareScratchRegs();
+}
+
+bool SIPrepareScratchRegs::runOnMachineFunction(MachineFunction &MF) {
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+ const SIInstrInfo *TII =
+ static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo());
+ const SIRegisterInfo *TRI = &TII->getRegisterInfo();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+ MachineBasicBlock *Entry = MF.begin();
+ MachineBasicBlock::iterator I = Entry->begin();
+ DebugLoc DL = I->getDebugLoc();
+
+ // FIXME: If we don't have enough VGPRs for SGPR spilling we will need to
+ // run this pass.
+ if (!MFI->hasSpilledVGPRs())
+ return false;
+
+ unsigned ScratchPtrPreloadReg =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
+ unsigned ScratchOffsetPreloadReg =
+ TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
+
+ if (!Entry->isLiveIn(ScratchPtrPreloadReg))
+ Entry->addLiveIn(ScratchPtrPreloadReg);
+
+ if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
+ Entry->addLiveIn(ScratchOffsetPreloadReg);
+
+ // Load the scratch pointer
+ unsigned ScratchPtrReg =
+ TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass);
+ int ScratchPtrFI = -1;
+
+ if (ScratchPtrReg != AMDGPU::NoRegister) {
+ // Found an SGPR to use.
+ MRI.setPhysRegUsed(ScratchPtrReg);
+ BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg)
+ .addReg(ScratchPtrPreloadReg);
+ } else {
+ // No SGPR is available, we must spill.
+ ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4);
+ BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE))
+ .addReg(ScratchPtrPreloadReg)
+ .addFrameIndex(ScratchPtrFI);
+ }
+
+ // Load the scratch offset.
+ unsigned ScratchOffsetReg =
+ TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
+ int ScratchOffsetFI = ~0;
+
+ if (ScratchOffsetReg != AMDGPU::NoRegister) {
+ // Found an SGPR to use
+ MRI.setPhysRegUsed(ScratchOffsetReg);
+ BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B32), ScratchOffsetReg)
+ .addReg(ScratchOffsetPreloadReg);
+ } else {
+ // No SGPR is available, we must spill.
+ ScratchOffsetFI = FrameInfo->CreateSpillStackObject(4,4);
+ BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S32_SAVE))
+ .addReg(ScratchOffsetPreloadReg)
+ .addFrameIndex(ScratchOffsetFI);
+ }
+
+
+ // Now that we have the scratch pointer and offset values, we need to
+ // add them to all the SI_SPILL_V* instructions.
+
+ RegScavenger RS;
+ bool UseRegScavenger =
+ (ScratchPtrReg == AMDGPU::NoRegister ||
+ ScratchOffsetReg == AMDGPU::NoRegister);
+ for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+ BI != BE; ++BI) {
+
+ MachineBasicBlock &MBB = *BI;
+ if (UseRegScavenger)
+ RS.enterBasicBlock(&MBB);
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I) {
+ MachineInstr &MI = *I;
+ DebugLoc DL = MI.getDebugLoc();
+ switch(MI.getOpcode()) {
+ default: break;;
+ case AMDGPU::SI_SPILL_V512_SAVE:
+ case AMDGPU::SI_SPILL_V256_SAVE:
+ case AMDGPU::SI_SPILL_V128_SAVE:
+ case AMDGPU::SI_SPILL_V96_SAVE:
+ case AMDGPU::SI_SPILL_V64_SAVE:
+ case AMDGPU::SI_SPILL_V32_SAVE:
+ case AMDGPU::SI_SPILL_V32_RESTORE:
+ case AMDGPU::SI_SPILL_V64_RESTORE:
+ case AMDGPU::SI_SPILL_V128_RESTORE:
+ case AMDGPU::SI_SPILL_V256_RESTORE:
+ case AMDGPU::SI_SPILL_V512_RESTORE:
+
+ // Scratch Pointer
+ if (ScratchPtrReg == AMDGPU::NoRegister) {
+ ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE),
+ ScratchPtrReg)
+ .addFrameIndex(ScratchPtrFI)
+ .addReg(AMDGPU::NoRegister)
+ .addReg(AMDGPU::NoRegister);
+ } else if (!MBB.isLiveIn(ScratchPtrReg)) {
+ MBB.addLiveIn(ScratchPtrReg);
+ }
+
+ if (ScratchOffsetReg == AMDGPU::NoRegister) {
+ ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
+ ScratchOffsetReg)
+ .addFrameIndex(ScratchOffsetFI)
+ .addReg(AMDGPU::NoRegister)
+ .addReg(AMDGPU::NoRegister);
+ } else if (!MBB.isLiveIn(ScratchOffsetReg)) {
+ MBB.addLiveIn(ScratchOffsetReg);
+ }
+
+ if (ScratchPtrReg == AMDGPU::NoRegister ||
+ ScratchOffsetReg == AMDGPU::NoRegister) {
+ LLVMContext &Ctx = MF.getFunction()->getContext();
+ Ctx.emitError("ran out of SGPRs for spilling VGPRs");
+ ScratchPtrReg = AMDGPU::SGPR0;
+ ScratchOffsetReg = AMDGPU::SGPR0;
+ }
+ MI.getOperand(2).setReg(ScratchPtrReg);
+ MI.getOperand(3).setReg(ScratchOffsetReg);
+
+ break;
+ }
+ if (UseRegScavenger)
+ RS.forward();
+ }
+ }
+ return true;
+}
Modified: llvm/trunk/lib/Target/R600/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIRegisterInfo.cpp?rev=225988&r1=225987&r2=225988&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIRegisterInfo.cpp Wed Jan 14 09:42:31 2015
@@ -23,6 +23,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st)
@@ -94,6 +95,84 @@ static unsigned getNumSubRegsForSpillOp(
}
}
+void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
+ unsigned LoadStoreOp,
+ unsigned Value,
+ unsigned ScratchPtr,
+ unsigned ScratchOffset,
+ int64_t Offset,
+ RegScavenger *RS) const {
+
+ const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo());
+ MachineBasicBlock *MBB = MI->getParent();
+ const MachineFunction *MF = MI->getParent()->getParent();
+ LLVMContext &Ctx = MF->getFunction()->getContext();
+ DebugLoc DL = MI->getDebugLoc();
+ bool IsLoad = TII->get(LoadStoreOp).mayLoad();
+
+ bool RanOutOfSGPRs = false;
+ unsigned SOffset = ScratchOffset;
+
+ unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0);
+ if (RsrcReg == AMDGPU::NoRegister) {
+ RanOutOfSGPRs = true;
+ RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
+ }
+
+ unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+ unsigned Size = NumSubRegs * 4;
+
+ uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
+ 0xffffffff; // Size
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64),
+ getSubReg(RsrcReg, AMDGPU::sub0_sub1))
+ .addReg(ScratchPtr)
+ .addReg(RsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
+ getSubReg(RsrcReg, AMDGPU::sub2))
+ .addImm(Rsrc & 0xffffffff)
+ .addReg(RsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
+ getSubReg(RsrcReg, AMDGPU::sub3))
+ .addImm(Rsrc >> 32)
+ .addReg(RsrcReg, RegState::ImplicitDefine);
+
+ if (!isUInt<12>(Offset + Size)) {
+ SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
+ if (SOffset == AMDGPU::NoRegister) {
+ RanOutOfSGPRs = true;
+ SOffset = AMDGPU::SGPR0;
+ }
+ BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
+ .addReg(ScratchOffset)
+ .addImm(Offset);
+ Offset = 0;
+ }
+
+ if (RanOutOfSGPRs)
+ Ctx.emitError("Ran out of SGPRs for spilling VGPRS");
+
+ for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
+ unsigned SubReg = NumSubRegs > 1 ?
+ getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
+ Value;
+ bool IsKill = (i == e - 1);
+
+ BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
+ .addReg(SubReg, getDefRegState(IsLoad))
+ .addReg(RsrcReg, getKillRegState(IsKill))
+ .addImm(Offset)
+ .addReg(SOffset, getKillRegState(IsKill))
+ .addImm(0) // glc
+ .addImm(0) // slc
+ .addImm(0) // tfe
+ .addReg(Value, RegState::Implicit | getDefRegState(IsLoad));
+ }
+}
+
void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
@@ -162,7 +241,8 @@ void SIRegisterInfo::eliminateFrameIndex
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg)
.addReg(Spill.VGPR)
- .addImm(Spill.Lane);
+ .addImm(Spill.Lane)
+ .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
if (isM0) {
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
.addReg(SubReg);
@@ -179,71 +259,24 @@ void SIRegisterInfo::eliminateFrameIndex
case AMDGPU::SI_SPILL_V128_SAVE:
case AMDGPU::SI_SPILL_V96_SAVE:
case AMDGPU::SI_SPILL_V64_SAVE:
- case AMDGPU::SI_SPILL_V32_SAVE: {
- unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
- unsigned SrcReg = MI->getOperand(0).getReg();
- int64_t Offset = FrameInfo->getObjectOffset(Index);
- unsigned Size = NumSubRegs * 4;
- unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
-
- for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
- unsigned SubReg = NumSubRegs > 1 ?
- getPhysRegSubReg(SrcReg, &AMDGPU::VGPR_32RegClass, i) :
- SrcReg;
- Offset += (i * 4);
- MFI->LDSWaveSpillSize = std::max((unsigned)Offset + 4, (unsigned)MFI->LDSWaveSpillSize);
-
- unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
- Offset, Size);
-
- if (AddrReg == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("Ran out of VGPRs for spilling VGPRS");
- AddrReg = AMDGPU::VGPR0;
- }
-
- // Store the value in LDS
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_WRITE_B32))
- .addImm(0) // gds
- .addReg(AddrReg, RegState::Kill) // addr
- .addReg(SubReg) // data0
- .addImm(0); // offset
- }
-
+ case AMDGPU::SI_SPILL_V32_SAVE:
+ buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
+ TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
+ FrameInfo->getObjectOffset(Index), RS);
MI->eraseFromParent();
break;
- }
case AMDGPU::SI_SPILL_V32_RESTORE:
case AMDGPU::SI_SPILL_V64_RESTORE:
case AMDGPU::SI_SPILL_V128_RESTORE:
case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_V512_RESTORE: {
- unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
- unsigned DstReg = MI->getOperand(0).getReg();
- int64_t Offset = FrameInfo->getObjectOffset(Index);
- unsigned Size = NumSubRegs * 4;
- unsigned TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
-
- // FIXME: We could use DS_READ_B64 here to optimize for larger registers.
- for (unsigned i = 0, e = NumSubRegs; i != e; ++i) {
- unsigned SubReg = NumSubRegs > 1 ?
- getPhysRegSubReg(DstReg, &AMDGPU::VGPR_32RegClass, i) :
- DstReg;
-
- Offset += (i * 4);
- unsigned AddrReg = TII->calculateLDSSpillAddress(*MBB, MI, RS, TmpReg,
- Offset, Size);
- if (AddrReg == AMDGPU::NoRegister) {
- LLVMContext &Ctx = MF->getFunction()->getContext();
- Ctx.emitError("Ran out of VGPRs for spilling VGPRs");
- AddrReg = AMDGPU::VGPR0;
- }
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::DS_READ_B32), SubReg)
- .addImm(0) // gds
- .addReg(AddrReg, RegState::Kill) // addr
- .addImm(0); //offset
- }
+ buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
+ TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
+ FrameInfo->getObjectOffset(Index), RS);
MI->eraseFromParent();
break;
}
@@ -431,9 +464,8 @@ unsigned SIRegisterInfo::getPreloadedVal
/// \brief Returns a register that is not used at any point in the function.
/// If all registers are used, then this function will return
// AMDGPU::NoRegister.
-unsigned SIRegisterInfo::findUnusedVGPR(const MachineRegisterInfo &MRI) const {
-
- const TargetRegisterClass *RC = &AMDGPU::VGPR_32RegClass;
+unsigned SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
+ const TargetRegisterClass *RC) const {
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
Modified: llvm/trunk/lib/Target/R600/SIRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIRegisterInfo.h?rev=225988&r1=225987&r2=225988&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIRegisterInfo.h (original)
+++ llvm/trunk/lib/Target/R600/SIRegisterInfo.h Wed Jan 14 09:42:31 2015
@@ -105,7 +105,14 @@ struct SIRegisterInfo : public AMDGPUReg
unsigned getPreloadedValue(const MachineFunction &MF,
enum PreloadedValue Value) const;
- unsigned findUnusedVGPR(const MachineRegisterInfo &MRI) const;
+ unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
+ const TargetRegisterClass *RC) const;
+
+private:
+ void buildScratchLoadStore(MachineBasicBlock::iterator MI,
+ unsigned LoadStoreOp, unsigned Value,
+ unsigned ScratchPtr, unsigned ScratchOffset,
+ int64_t Offset, RegScavenger *RS) const;
};
} // End namespace llvm
More information about the llvm-commits
mailing list