[PATCH] AMDGPU: Fix VGPR spilling
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 13 13:39:09 PDT 2015
On Tue, Oct 13, 2015 at 11:23:19AM -0700, Tom Stellard via llvm-commits wrote:
> On Wed, Sep 30, 2015 at 11:11:51PM -0700, Matt Arsenault via llvm-commits wrote:
> > Hi,
> >
> > These fix problems from trying to use the register scavenger when trying to spill registers.
> >
>
> > From 4ec50a4a359ed0554d12cc4a33e8314ac8e62ba9 Mon Sep 17 00:00:00 2001
> > From: Matt Arsenault <Matthew.Arsenault at amd.com>
> > Date: Tue, 29 Sep 2015 20:35:03 -0700
> > Subject: [PATCH 1/7] AMDGPU: Stop reserving v[254:255]
> >
> > This wasn't doing anything useful. They weren't explicitly used
> > anywhere, and the RegScavenger ignores reserved registers.
> > ---
>
> LGTM.
>
> > From 46ba5470a06c6b44d2325b8a482bfc49a2c9f507 Mon Sep 17 00:00:00 2001
> > From: Matt Arsenault <Matthew.Arsenault at amd.com>
> > Date: Tue, 29 Sep 2015 19:26:18 -0700
> > Subject: [PATCH 2/7] AMDGPU: Reserve registers for scratch buffer resources
> >
> > ---
> > lib/Target/AMDGPU/SIISelLowering.cpp | 4 ++++
> > lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 9 +++++++++
> > lib/Target/AMDGPU/SIMachineFunctionInfo.h | 10 ++++++++++
> > lib/Target/AMDGPU/SIRegisterInfo.cpp | 20 +++++++++++++++++++-
> > 4 files changed, 42 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
> > index b4b439c..50bc1d0 100644
> > --- a/lib/Target/AMDGPU/SIISelLowering.cpp
> > +++ b/lib/Target/AMDGPU/SIISelLowering.cpp
> > @@ -509,6 +509,7 @@ SDValue SITargetLowering::LowerFormalArguments(
> > MachineFunction &MF = DAG.getMachineFunction();
> > FunctionType *FType = MF.getFunction()->getFunctionType();
> > SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
> > + const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
> >
> > assert(CallConv == CallingConv::C);
> >
> > @@ -696,6 +697,9 @@ SDValue SITargetLowering::LowerFormalArguments(
> > Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
> > }
> >
> > + if (ST.isVGPRSpillingEnabled(Info))
> > + Info->setScratchRSrcReg(TRI);
> > +
> > if (Chains.empty())
> > return Chain;
> >
> > diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
> > index d23b92e..269febf 100644
> > --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
> > +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
> > @@ -29,11 +29,20 @@ void SIMachineFunctionInfo::anchor() {}
> > SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
> > : AMDGPUMachineFunction(MF),
> > TIDReg(AMDGPU::NoRegister),
> > + ScratchRSrcReg(AMDGPU::NoRegister),
> > HasSpilledVGPRs(false),
> > PSInputAddr(0),
> > NumUserSGPRs(0),
> > LDSWaveSpillSize(0) { }
> >
> > +void SIMachineFunctionInfo::setScratchRSrcReg(const SIRegisterInfo *TRI) {
> > + // We need to round up to next multiple of 4.
> > + unsigned NextSReg128 = RoundUpToAlignment(NumUserSGPRs + 5, 4);
> > + unsigned RegSub0 = AMDGPU::SReg_32RegClass.getRegister(NextSReg128);
> > + ScratchRSrcReg = TRI->getMatchingSuperReg(RegSub0, AMDGPU::sub0,
> > + &AMDGPU::SReg_128RegClass);
>
This will also reserve the wrong register for non-compute shaders. We should
be choosing the register to reserve by calling CCInfo.getFirstUnallocated()
at the end of SITargetLowering::LowerFormalArguments().
-Tom
> I would prefer to have this logic in SIRegisterInfo::getPreloadedValue().
> For HSA, the scratch descriptor will be in s[0:3].
>
> At some point we are going to need to have 'dummy' registers or something
> similar for all the pre-loaded values, and then fill the real values in
> some time later.
>
> > +}
> > +
> > SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
> > MachineFunction *MF,
> > unsigned FrameIndex,
> > diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
> > index 667da4c..db60e90 100644
> > --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
> > +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
> > @@ -29,6 +29,7 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction {
> > void anchor() override;
> >
> > unsigned TIDReg;
> > + unsigned ScratchRSrcReg;
> > bool HasSpilledVGPRs;
> >
> > public:
> > @@ -54,6 +55,15 @@ public:
> > bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
> > unsigned getTIDReg() const { return TIDReg; };
> > void setTIDReg(unsigned Reg) { TIDReg = Reg; }
> > +
> > + /// \brief Returns the physical register reserved for use as the resource
> > + /// descriptor for scratch accesses.
> > + unsigned getScratchRSrcReg() const {
> > + return ScratchRSrcReg;
> > + }
> > +
> > + void setScratchRSrcReg(const SIRegisterInfo *TRI);
> > +
> > bool hasSpilledVGPRs() const { return HasSpilledVGPRs; }
> > void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; }
> >
> > diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
> > index 108c00b..ba7a113 100644
> > --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
> > +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
> > @@ -41,9 +41,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
> > reserveRegisterTuples(Reserved, AMDGPU::EXEC);
> > reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
> >
> > + const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
> > +
> > // Tonga and Iceland can only allocate a fixed number of SGPRs due
> > // to a hw bug.
> > - if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) {
> > + if (ST.hasSGPRInitBug()) {
> > unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
> > // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
> > // Assume XNACK_MASK is unused.
> > @@ -55,6 +57,22 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
> > }
> > }
> >
> > + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
> > + unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
> > + if (ScratchRSrcReg != AMDGPU::NoRegister) {
> > + unsigned ScratchOffsetPreloadReg
> > + = getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
> > + // We will need to use this user SGPR argument for spilling, and thus never
> > + // want it to be spilled.
> > + reserveRegisterTuples(Reserved, ScratchOffsetPreloadReg);
> > +
> > + // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
> > + // to spill.
> > + // TODO: May need to reserve a VGPR if doing LDS spilling.
> > + reserveRegisterTuples(Reserved, ScratchRSrcReg);
> > + assert(!isSubRegister(ScratchRSrcReg, ScratchOffsetPreloadReg));
> > + }
> > +
> > return Reserved;
> > }
> >
> > --
> > 2.2.1
> >
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list