[PATCH] AMDGPU: Fix VGPR spilling
Tom Stellard via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 13 11:23:19 PDT 2015
On Wed, Sep 30, 2015 at 11:11:51PM -0700, Matt Arsenault via llvm-commits wrote:
> Hi,
>
> These fix problems from trying to use the register scavenger when trying to spill registers.
>
> From 4ec50a4a359ed0554d12cc4a33e8314ac8e62ba9 Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Tue, 29 Sep 2015 20:35:03 -0700
> Subject: [PATCH 1/7] AMDGPU: Stop reserving v[254:255]
>
> This wasn't doing anything useful. They weren't explicitly used
> anywhere, and the RegScavenger ignores reserved registers.
> ---
LGTM.
> From 46ba5470a06c6b44d2325b8a482bfc49a2c9f507 Mon Sep 17 00:00:00 2001
> From: Matt Arsenault <Matthew.Arsenault at amd.com>
> Date: Tue, 29 Sep 2015 19:26:18 -0700
> Subject: [PATCH 2/7] AMDGPU: Reserve registers for scratch buffer resources
>
> ---
> lib/Target/AMDGPU/SIISelLowering.cpp | 4 ++++
> lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 9 +++++++++
> lib/Target/AMDGPU/SIMachineFunctionInfo.h | 10 ++++++++++
> lib/Target/AMDGPU/SIRegisterInfo.cpp | 20 +++++++++++++++++++-
> 4 files changed, 42 insertions(+), 1 deletion(-)
>
> diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
> index b4b439c..50bc1d0 100644
> --- a/lib/Target/AMDGPU/SIISelLowering.cpp
> +++ b/lib/Target/AMDGPU/SIISelLowering.cpp
> @@ -509,6 +509,7 @@ SDValue SITargetLowering::LowerFormalArguments(
> MachineFunction &MF = DAG.getMachineFunction();
> FunctionType *FType = MF.getFunction()->getFunctionType();
> SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
> + const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
>
> assert(CallConv == CallingConv::C);
>
> @@ -696,6 +697,9 @@ SDValue SITargetLowering::LowerFormalArguments(
> Info->ScratchOffsetReg = AMDGPU::SGPR_32RegClass.getRegister(ScratchIdx);
> }
>
> + if (ST.isVGPRSpillingEnabled(Info))
> + Info->setScratchRSrcReg(TRI);
> +
> if (Chains.empty())
> return Chain;
>
> diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
> index d23b92e..269febf 100644
> --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
> +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
> @@ -29,11 +29,20 @@ void SIMachineFunctionInfo::anchor() {}
> SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
> : AMDGPUMachineFunction(MF),
> TIDReg(AMDGPU::NoRegister),
> + ScratchRSrcReg(AMDGPU::NoRegister),
> HasSpilledVGPRs(false),
> PSInputAddr(0),
> NumUserSGPRs(0),
> LDSWaveSpillSize(0) { }
>
> +void SIMachineFunctionInfo::setScratchRSrcReg(const SIRegisterInfo *TRI) {
> + // We need to round up to next multiple of 4.
> + unsigned NextSReg128 = RoundUpToAlignment(NumUserSGPRs + 5, 4);
> + unsigned RegSub0 = AMDGPU::SReg_32RegClass.getRegister(NextSReg128);
> + ScratchRSrcReg = TRI->getMatchingSuperReg(RegSub0, AMDGPU::sub0,
> + &AMDGPU::SReg_128RegClass);
I would prefer to have this logic in SIRegisterInfo::getPreloadedValue().
For HSA, the scratch descriptor will be in s[0:3].
At some point we are going to need to have 'dummy' registers or something
similar for all the pre-loaded values, and then fill the real values in
some time later.
> +}
> +
> SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg(
> MachineFunction *MF,
> unsigned FrameIndex,
> diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
> index 667da4c..db60e90 100644
> --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
> +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
> @@ -29,6 +29,7 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction {
> void anchor() override;
>
> unsigned TIDReg;
> + unsigned ScratchRSrcReg;
> bool HasSpilledVGPRs;
>
> public:
> @@ -54,6 +55,15 @@ public:
> bool hasCalculatedTID() const { return TIDReg != AMDGPU::NoRegister; };
> unsigned getTIDReg() const { return TIDReg; };
> void setTIDReg(unsigned Reg) { TIDReg = Reg; }
> +
> + /// \brief Returns the physical register reserved for use as the resource
> + /// descriptor for scratch accesses.
> + unsigned getScratchRSrcReg() const {
> + return ScratchRSrcReg;
> + }
> +
> + void setScratchRSrcReg(const SIRegisterInfo *TRI);
> +
> bool hasSpilledVGPRs() const { return HasSpilledVGPRs; }
> void setHasSpilledVGPRs(bool Spill = true) { HasSpilledVGPRs = Spill; }
>
> diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
> index 108c00b..ba7a113 100644
> --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
> +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
> @@ -41,9 +41,11 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
> reserveRegisterTuples(Reserved, AMDGPU::EXEC);
> reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
>
> + const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
> +
> // Tonga and Iceland can only allocate a fixed number of SGPRs due
> // to a hw bug.
> - if (MF.getSubtarget<AMDGPUSubtarget>().hasSGPRInitBug()) {
> + if (ST.hasSGPRInitBug()) {
> unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
> // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
> // Assume XNACK_MASK is unused.
> @@ -55,6 +57,22 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
> }
> }
>
> + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
> + unsigned ScratchRSrcReg = MFI->getScratchRSrcReg();
> + if (ScratchRSrcReg != AMDGPU::NoRegister) {
> + unsigned ScratchOffsetPreloadReg
> + = getPreloadedValue(MF, SIRegisterInfo::SCRATCH_WAVE_OFFSET);
> + // We will need to use this user SGPR argument for spilling, and thus never
> + // want it to be spilled.
> + reserveRegisterTuples(Reserved, ScratchOffsetPreloadReg);
> +
> + // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we need
> + // to spill.
> + // TODO: May need to reserve a VGPR if doing LDS spilling.
> + reserveRegisterTuples(Reserved, ScratchRSrcReg);
> + assert(!isSubRegister(ScratchRSrcReg, ScratchOffsetPreloadReg));
> + }
> +
> return Reserved;
> }
>
> --
> 2.2.1
>
More information about the llvm-commits
mailing list