[llvm] r226586 - R600/SI: Use external symbols for scratch buffer
Tom Stellard
thomas.stellard at amd.com
Tue Jan 20 09:49:47 PST 2015
Author: tstellar
Date: Tue Jan 20 11:49:47 2015
New Revision: 226586
URL: http://llvm.org/viewvc/llvm-project?rev=226586&view=rev
Log:
R600/SI: Use external symbols for scratch buffer
We were passing the scratch buffer address to the shaders via user sgprs,
but now we use external symbols and have the driver patch the shader
using reloc information.
Modified:
llvm/trunk/lib/Target/R600/AMDGPU.h
llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.cpp
llvm/trunk/lib/Target/R600/SIDefines.h
llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
llvm/trunk/lib/Target/R600/SIInstructions.td
llvm/trunk/lib/Target/R600/SIPrepareScratchRegs.cpp
llvm/trunk/lib/Target/R600/SIRegisterInfo.cpp
llvm/trunk/lib/Target/R600/SIRegisterInfo.h
Modified: llvm/trunk/lib/Target/R600/AMDGPU.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPU.h?rev=226586&r1=226585&r2=226586&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPU.h (original)
+++ llvm/trunk/lib/Target/R600/AMDGPU.h Tue Jan 20 11:49:47 2015
@@ -77,7 +77,11 @@ extern Target TheGCNTarget;
namespace AMDGPU {
enum TargetIndex {
- TI_CONSTDATA_START
+ TI_CONSTDATA_START,
+ TI_SCRATCH_RSRC_DWORD0,
+ TI_SCRATCH_RSRC_DWORD1,
+ TI_SCRATCH_RSRC_DWORD2,
+ TI_SCRATCH_RSRC_DWORD3
};
}
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp?rev=226586&r1=226585&r2=226586&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelDAGToDAG.cpp Tue Jan 20 11:49:47 2015
@@ -962,16 +962,27 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScra
const SITargetLowering& Lowering =
*static_cast<const SITargetLowering*>(getTargetLowering());
- unsigned ScratchPtrReg =
- TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_PTR);
unsigned ScratchOffsetReg =
TRI->getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
Lowering.CreateLiveInRegister(*CurDAG, &AMDGPU::SReg_32RegClass,
ScratchOffsetReg, MVT::i32);
+ SDValue Sym0 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD0", MVT::i32);
+ SDValue ScratchRsrcDword0 =
+ SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym0), 0);
- SDValue ScratchPtr =
- CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
- MRI.getLiveInVirtReg(ScratchPtrReg), MVT::i64);
+ SDValue Sym1 = CurDAG->getExternalSymbol("SCRATCH_RSRC_DWORD1", MVT::i32);
+ SDValue ScratchRsrcDword1 =
+ SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, Sym1), 0);
+
+ const SDValue RsrcOps[] = {
+ CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32),
+ ScratchRsrcDword0,
+ CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32),
+ ScratchRsrcDword1,
+ CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32),
+ };
+ SDValue ScratchPtr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+ MVT::v2i32, RsrcOps), 0);
Rsrc = SDValue(Lowering.buildScratchRSRC(*CurDAG, DL, ScratchPtr), 0);
SOffset = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
MRI.getLiveInVirtReg(ScratchOffsetReg), MVT::i32);
Modified: llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.cpp?rev=226586&r1=226585&r2=226586&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.cpp Tue Jan 20 11:49:47 2015
@@ -80,6 +80,12 @@ void AMDGPUMCInstLower::lower(const Mach
MCOp = MCOperand::CreateExpr(Expr);
break;
}
+ case MachineOperand::MO_ExternalSymbol: {
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(StringRef(MO.getSymbolName()));
+ const MCSymbolRefExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx);
+ MCOp = MCOperand::CreateExpr(Expr);
+ break;
+ }
}
OutMI.addOperand(MCOp);
}
Modified: llvm/trunk/lib/Target/R600/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIDefines.h?rev=226586&r1=226585&r2=226586&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIDefines.h (original)
+++ llvm/trunk/lib/Target/R600/SIDefines.h Tue Jan 20 11:49:47 2015
@@ -163,4 +163,5 @@ namespace SIOutMods {
#define R_00B860_COMPUTE_TMPRING_SIZE 0x00B860
#define S_00B860_WAVESIZE(x) (((x) & 0x1FFF) << 12)
+
#endif
Modified: llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstrInfo.cpp?rev=226586&r1=226585&r2=226586&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIInstrInfo.cpp Tue Jan 20 11:49:47 2015
@@ -482,7 +482,7 @@ void SIInstrInfo::storeRegToStackSlot(Ma
.addFrameIndex(FrameIndex)
// Place-holder registers, these will be filled in by
// SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+ .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
.addReg(AMDGPU::SGPR0, RegState::Undef);
} else {
LLVMContext &Ctx = MF->getFunction()->getContext();
@@ -528,7 +528,7 @@ void SIInstrInfo::loadRegFromStackSlot(M
.addFrameIndex(FrameIndex)
// Place-holder registers, these will be filled in by
// SIPrepareScratchRegs.
- .addReg(AMDGPU::SGPR0_SGPR1, RegState::Undef)
+ .addReg(AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3, RegState::Undef)
.addReg(AMDGPU::SGPR0, RegState::Undef);
} else {
Modified: llvm/trunk/lib/Target/R600/SIInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIInstructions.td?rev=226586&r1=226585&r2=226586&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIInstructions.td (original)
+++ llvm/trunk/lib/Target/R600/SIInstructions.td Tue Jan 20 11:49:47 2015
@@ -1951,14 +1951,14 @@ multiclass SI_SPILL_SGPR <RegisterClass
let UseNamedOperandTable = 1 in {
def _SAVE : InstSI <
(outs),
- (ins sgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
+ (ins sgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
SReg_32:$scratch_offset),
"", []
>;
def _RESTORE : InstSI <
(outs sgpr_class:$dst),
- (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
+ (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
"", []
>;
} // End UseNamedOperandTable = 1
@@ -1974,14 +1974,14 @@ multiclass SI_SPILL_VGPR <RegisterClass
let UseNamedOperandTable = 1 in {
def _SAVE : InstSI <
(outs),
- (ins vgpr_class:$src, i32imm:$frame_idx, SReg_64:$scratch_ptr,
+ (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
SReg_32:$scratch_offset),
"", []
>;
def _RESTORE : InstSI <
(outs vgpr_class:$dst),
- (ins i32imm:$frame_idx, SReg_64:$scratch_ptr, SReg_32:$scratch_offset),
+ (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset),
"", []
>;
} // End UseNamedOperandTable = 1
Modified: llvm/trunk/lib/Target/R600/SIPrepareScratchRegs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIPrepareScratchRegs.cpp?rev=226586&r1=226585&r2=226586&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIPrepareScratchRegs.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIPrepareScratchRegs.cpp Tue Jan 20 11:49:47 2015
@@ -29,6 +29,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/LLVMContext.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
namespace {
@@ -84,28 +85,10 @@ bool SIPrepareScratchRegs::runOnMachineF
if (!Entry->isLiveIn(ScratchOffsetPreloadReg))
Entry->addLiveIn(ScratchOffsetPreloadReg);
- // Load the scratch pointer
- unsigned ScratchPtrReg =
- TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_64RegClass);
- int ScratchPtrFI = -1;
-
- if (ScratchPtrReg != AMDGPU::NoRegister) {
- // Found an SGPR to use.
- MRI.setPhysRegUsed(ScratchPtrReg);
- BuildMI(*Entry, I, DL, TII->get(AMDGPU::S_MOV_B64), ScratchPtrReg)
- .addReg(ScratchPtrPreloadReg);
- } else {
- // No SGPR is available, we must spill.
- ScratchPtrFI = FrameInfo->CreateSpillStackObject(8, 4);
- BuildMI(*Entry, I, DL, TII->get(AMDGPU::SI_SPILL_S64_SAVE))
- .addReg(ScratchPtrPreloadReg)
- .addFrameIndex(ScratchPtrFI);
- }
-
// Load the scratch offset.
unsigned ScratchOffsetReg =
TRI->findUnusedRegister(MRI, &AMDGPU::SGPR_32RegClass);
- int ScratchOffsetFI = ~0;
+ int ScratchOffsetFI = -1;
if (ScratchOffsetReg != AMDGPU::NoRegister) {
// Found an SGPR to use
@@ -125,22 +108,26 @@ bool SIPrepareScratchRegs::runOnMachineF
// add them to all the SI_SPILL_V* instructions.
RegScavenger RS;
- bool UseRegScavenger =
- (ScratchPtrReg == AMDGPU::NoRegister ||
- ScratchOffsetReg == AMDGPU::NoRegister);
+ unsigned ScratchRsrcFI = FrameInfo->CreateSpillStackObject(16, 4);
+ RS.addScavengingFrameIndex(ScratchRsrcFI);
+
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI) {
MachineBasicBlock &MBB = *BI;
- if (UseRegScavenger)
- RS.enterBasicBlock(&MBB);
+ // Add the scratch offset reg as a live-in so that the register scavenger
+ // doesn't re-use it.
+ if (!MBB.isLiveIn(ScratchOffsetReg))
+ MBB.addLiveIn(ScratchOffsetReg);
+ RS.enterBasicBlock(&MBB);
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
MachineInstr &MI = *I;
+ RS.forward(I);
DebugLoc DL = MI.getDebugLoc();
switch(MI.getOpcode()) {
- default: break;;
+ default: break;
case AMDGPU::SI_SPILL_V512_SAVE:
case AMDGPU::SI_SPILL_V256_SAVE:
case AMDGPU::SI_SPILL_V128_SAVE:
@@ -153,18 +140,35 @@ bool SIPrepareScratchRegs::runOnMachineF
case AMDGPU::SI_SPILL_V256_RESTORE:
case AMDGPU::SI_SPILL_V512_RESTORE:
- // Scratch Pointer
- if (ScratchPtrReg == AMDGPU::NoRegister) {
- ScratchPtrReg = RS.scavengeRegister(&AMDGPU::SGPR_64RegClass, 0);
- BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S64_RESTORE),
- ScratchPtrReg)
- .addFrameIndex(ScratchPtrFI)
- .addReg(AMDGPU::NoRegister)
- .addReg(AMDGPU::NoRegister);
- } else if (!MBB.isLiveIn(ScratchPtrReg)) {
- MBB.addLiveIn(ScratchPtrReg);
- }
+ // Scratch resource
+ unsigned ScratchRsrcReg =
+ RS.scavengeRegister(&AMDGPU::SReg_128RegClass, 0);
+
+ uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
+ 0xffffffff; // Size
+
+ unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
+ unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
+ unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
+ unsigned Rsrc3 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub3);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc0)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD0")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc1)
+ .addExternalSymbol("SCRATCH_RSRC_DWORD1")
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc2)
+ .addImm(Rsrc & 0xffffffff)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), Rsrc3)
+ .addImm(Rsrc >> 32)
+ .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
+ // Scratch Offset
if (ScratchOffsetReg == AMDGPU::NoRegister) {
ScratchOffsetReg = RS.scavengeRegister(&AMDGPU::SGPR_32RegClass, 0);
BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_SPILL_S32_RESTORE),
@@ -176,20 +180,26 @@ bool SIPrepareScratchRegs::runOnMachineF
MBB.addLiveIn(ScratchOffsetReg);
}
- if (ScratchPtrReg == AMDGPU::NoRegister ||
+ if (ScratchRsrcReg == AMDGPU::NoRegister ||
ScratchOffsetReg == AMDGPU::NoRegister) {
LLVMContext &Ctx = MF.getFunction()->getContext();
Ctx.emitError("ran out of SGPRs for spilling VGPRs");
- ScratchPtrReg = AMDGPU::SGPR0;
+ ScratchRsrcReg = AMDGPU::SGPR0;
ScratchOffsetReg = AMDGPU::SGPR0;
}
- MI.getOperand(2).setReg(ScratchPtrReg);
+ MI.getOperand(2).setReg(ScratchRsrcReg);
+ MI.getOperand(2).setIsKill(true);
+ MI.getOperand(2).setIsUndef(false);
MI.getOperand(3).setReg(ScratchOffsetReg);
+ MI.getOperand(3).setIsUndef(false);
+ MI.addOperand(MachineOperand::CreateReg(Rsrc0, false, true, true));
+ MI.addOperand(MachineOperand::CreateReg(Rsrc1, false, true, true));
+ MI.addOperand(MachineOperand::CreateReg(Rsrc2, false, true, true));
+ MI.addOperand(MachineOperand::CreateReg(Rsrc3, false, true, true));
+ MI.dump();
break;
}
- if (UseRegScavenger)
- RS.forward();
}
}
return true;
Modified: llvm/trunk/lib/Target/R600/SIRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIRegisterInfo.cpp?rev=226586&r1=226585&r2=226586&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/SIRegisterInfo.cpp Tue Jan 20 11:49:47 2015
@@ -98,7 +98,7 @@ static unsigned getNumSubRegsForSpillOp(
void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp,
unsigned Value,
- unsigned ScratchPtr,
+ unsigned ScratchRsrcReg,
unsigned ScratchOffset,
int64_t Offset,
RegScavenger *RS) const {
@@ -113,34 +113,11 @@ void SIRegisterInfo::buildScratchLoadSto
bool RanOutOfSGPRs = false;
unsigned SOffset = ScratchOffset;
- unsigned RsrcReg = RS->scavengeRegister(&AMDGPU::SReg_128RegClass, MI, 0);
- if (RsrcReg == AMDGPU::NoRegister) {
- RanOutOfSGPRs = true;
- RsrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;
- }
-
unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
unsigned Size = NumSubRegs * 4;
- uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
- 0xffffffff; // Size
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B64),
- getSubReg(RsrcReg, AMDGPU::sub0_sub1))
- .addReg(ScratchPtr)
- .addReg(RsrcReg, RegState::ImplicitDefine);
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
- getSubReg(RsrcReg, AMDGPU::sub2))
- .addImm(Rsrc & 0xffffffff)
- .addReg(RsrcReg, RegState::ImplicitDefine);
-
- BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32),
- getSubReg(RsrcReg, AMDGPU::sub3))
- .addImm(Rsrc >> 32)
- .addReg(RsrcReg, RegState::ImplicitDefine);
-
if (!isUInt<12>(Offset + Size)) {
+ dbgs() << "Offset scavenge\n";
SOffset = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
if (SOffset == AMDGPU::NoRegister) {
RanOutOfSGPRs = true;
@@ -163,7 +140,7 @@ void SIRegisterInfo::buildScratchLoadSto
BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
.addReg(SubReg, getDefRegState(IsLoad))
- .addReg(RsrcReg, getKillRegState(IsKill))
+ .addReg(ScratchRsrcReg, getKillRegState(IsKill))
.addImm(Offset)
.addReg(SOffset, getKillRegState(IsKill))
.addImm(0) // glc
@@ -236,6 +213,7 @@ void SIRegisterInfo::eliminateFrameIndex
}
if (isM0) {
+ dbgs() << "Scavenge M0\n";
SubReg = RS->scavengeRegister(&AMDGPU::SGPR_32RegClass, MI, 0);
}
@@ -262,7 +240,7 @@ void SIRegisterInfo::eliminateFrameIndex
case AMDGPU::SI_SPILL_V32_SAVE:
buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
TII->getNamedOperand(*MI, AMDGPU::OpName::src)->getReg(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
FrameInfo->getObjectOffset(Index), RS);
MI->eraseFromParent();
@@ -274,7 +252,7 @@ void SIRegisterInfo::eliminateFrameIndex
case AMDGPU::SI_SPILL_V512_RESTORE: {
buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
TII->getNamedOperand(*MI, AMDGPU::OpName::dst)->getReg(),
- TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_ptr)->getReg(),
+ TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
FrameInfo->getObjectOffset(Index), RS);
MI->eraseFromParent();
Modified: llvm/trunk/lib/Target/R600/SIRegisterInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/SIRegisterInfo.h?rev=226586&r1=226585&r2=226586&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/SIRegisterInfo.h (original)
+++ llvm/trunk/lib/Target/R600/SIRegisterInfo.h Tue Jan 20 11:49:47 2015
@@ -111,7 +111,7 @@ struct SIRegisterInfo : public AMDGPUReg
private:
void buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp, unsigned Value,
- unsigned ScratchPtr, unsigned ScratchOffset,
+ unsigned ScratchRsrcReg, unsigned ScratchOffset,
int64_t Offset, RegScavenger *RS) const;
};
More information about the llvm-commits
mailing list