[llvm] 8470bf2 - [AMDGPU] Do not reserve any VGPR for SGPR spills
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 11 22:15:37 PST 2022
Author: Austin Kerbow
Date: 2022-01-11T22:14:59-08:00
New Revision: 8470bf2b08845eea552a0539017e500ab21fd599
URL: https://github.com/llvm/llvm-project/commit/8470bf2b08845eea552a0539017e500ab21fd599
DIFF: https://github.com/llvm/llvm-project/commit/8470bf2b08845eea552a0539017e500ab21fd599.diff
LOG: [AMDGPU] Do not reserve any VGPR for SGPR spills
After the split register allocation changes in eebe841a47cb it is no
longer necessary to reserve a VGPR before RA. This can also create bugs
when IPRA is enabled since we cannot predict that a called function may
not reserve any register if it does not have any SGPR spills. If that
happens those functions may override reserved registers that are
normally callee saved. Added a test to show this.
Fixes: SWDEV-309900
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D115551
Added:
llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
Modified:
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
Removed:
llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index d4fe74ecb96e6..a347e91f3fad2 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -1320,16 +1320,14 @@ void SIFrameLowering::determineCalleeSavesSGPR(MachineFunction &MF,
const BitVector AllSavedRegs = SavedRegs;
SavedRegs.clearBitsInMask(TRI->getAllVectorRegMask());
- // If clearing VGPRs changed the mask, we will have some CSR VGPR spills.
- const bool HaveAnyCSRVGPR = SavedRegs != AllSavedRegs;
-
// We have to anticipate introducing CSR VGPR spills or spill of caller
// save VGPR reserved for SGPR spills as we now always create stack entry
- // for it, if we don't have any stack objects already, since we require
- // an FP if there is a call and stack.
+ // for it, if we don't have any stack objects already, since we require a FP
+ // if there is a call and stack. We will allocate a VGPR for SGPR spills if
+ // there are any SGPR spills. Whether they are CSR spills or otherwise.
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
const bool WillHaveFP =
- FrameInfo.hasCalls() && (HaveAnyCSRVGPR || MFI->VGPRReservedForSGPRSpill);
+ FrameInfo.hasCalls() && (AllSavedRegs.any() || MFI->hasSpilledSGPRs());
// FP will be specially managed like SP.
if (WillHaveFP || hasFP(MF))
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index eb3ea41fd20e6..ac8bdab14cbdf 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -45,10 +45,6 @@ static cl::opt<bool> DisableLoopAlignment(
cl::desc("Do not align and prefetch loops"),
cl::init(false));
-static cl::opt<bool> VGPRReserveforSGPRSpill(
- "amdgpu-reserve-vgpr-for-sgpr-spill",
- cl::desc("Allocates one VGPR for future SGPR Spill"), cl::init(true));
-
static cl::opt<bool> UseDivergentRegisterIndexing(
"amdgpu-use-divergent-register-indexing",
cl::Hidden,
@@ -11990,13 +11986,6 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
}
TargetLoweringBase::finalizeLowering(MF);
-
- // Allocate a VGPR for future SGPR Spill if
- // "amdgpu-reserve-vgpr-for-sgpr-spill" option is used
- // FIXME: We won't need this hack if we split SGPR allocation from VGPR
- if (VGPRReserveforSGPRSpill && TRI->spillSGPRToVGPR() &&
- !Info->VGPRReservedForSGPRSpill && !Info->isEntryFunction())
- Info->reserveVGPRforSGPRSpills(MF);
}
void SITargetLowering::computeKnownBitsForFrameIndex(
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 55196fe334e6a..f8b4d3e3319ea 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -239,50 +239,6 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
return false;
}
-// Find lowest available VGPR and use it as VGPR reserved for SGPR spills.
-static bool lowerShiftReservedVGPR(MachineFunction &MF,
- const GCNSubtarget &ST) {
- SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
- const Register PreReservedVGPR = FuncInfo->VGPRReservedForSGPRSpill;
- // Early out if pre-reservation of a VGPR for SGPR spilling is disabled.
- if (!PreReservedVGPR)
- return false;
-
- // If there are no free lower VGPRs available, default to using the
- // pre-reserved register instead.
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- Register LowestAvailableVGPR =
- TRI->findUnusedRegister(MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF);
- if (!LowestAvailableVGPR)
- LowestAvailableVGPR = PreReservedVGPR;
-
- MachineFrameInfo &FrameInfo = MF.getFrameInfo();
- // Create a stack object for a possible spill in the function prologue.
- // Note Non-CSR VGPR also need this as we may overwrite inactive lanes.
- Optional<int> FI = FrameInfo.CreateSpillStackObject(4, Align(4));
-
- // Find saved info about the pre-reserved register.
- const auto *ReservedVGPRInfoItr =
- llvm::find_if(FuncInfo->getSGPRSpillVGPRs(),
- [PreReservedVGPR](const auto &SpillRegInfo) {
- return SpillRegInfo.VGPR == PreReservedVGPR;
- });
-
- assert(ReservedVGPRInfoItr != FuncInfo->getSGPRSpillVGPRs().end());
- auto Index =
- std::distance(FuncInfo->getSGPRSpillVGPRs().begin(), ReservedVGPRInfoItr);
-
- FuncInfo->setSGPRSpillVGPRs(LowestAvailableVGPR, FI, Index);
-
- for (MachineBasicBlock &MBB : MF) {
- assert(LowestAvailableVGPR.isValid() && "Did not find an available VGPR");
- MBB.addLiveIn(LowestAvailableVGPR);
- MBB.sortUniqueLiveIns();
- }
-
- return true;
-}
-
bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
@@ -304,11 +260,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
if (!MFI.hasStackObjects() && !HasCSRs) {
SaveBlocks.clear();
RestoreBlocks.clear();
- if (FuncInfo->VGPRReservedForSGPRSpill) {
- // Free the reserved VGPR for later possible use by frame lowering.
- FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
- MRI.freezeReservedRegs(MF);
- }
return false;
}
@@ -326,8 +277,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
// This operates under the assumption that only other SGPR spills are users
// of the frame index.
- lowerShiftReservedVGPR(MF, ST);
-
// To track the spill frame indices handled in this pass.
BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
@@ -375,8 +324,6 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
FuncInfo->removeDeadFrameIndices(MFI);
MadeChange = true;
- } else if (FuncInfo->VGPRReservedForSGPRSpill) {
- FuncInfo->removeVGPRForSGPRSpill(FuncInfo->VGPRReservedForSGPRSpill, MF);
}
SaveBlocks.clear();
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 3ce368ef4db95..292ca4529883b 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -274,7 +274,6 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
unsigned WaveSize = ST.getWavefrontSize();
- SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
unsigned Size = FrameInfo.getObjectSize(FI);
unsigned NumLanes = Size / 4;
@@ -291,16 +290,7 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
Register LaneVGPR;
unsigned VGPRIndex = (NumVGPRSpillLanes % WaveSize);
- // Reserve a VGPR (when NumVGPRSpillLanes = 0, WaveSize, 2*WaveSize, ..) and
- // when one of the two conditions is true:
- // 1. One reserved VGPR being tracked by VGPRReservedForSGPRSpill is not yet
- // reserved.
- // 2. All spill lanes of reserved VGPR(s) are full and another spill lane is
- // required.
- if (FuncInfo->VGPRReservedForSGPRSpill && NumVGPRSpillLanes < WaveSize) {
- assert(FuncInfo->VGPRReservedForSGPRSpill == SpillVGPRs.back().VGPR);
- LaneVGPR = FuncInfo->VGPRReservedForSGPRSpill;
- } else if (VGPRIndex == 0) {
+ if (VGPRIndex == 0) {
LaneVGPR = TRI->findUnusedRegister(MRI, &AMDGPU::VGPR_32RegClass, MF);
if (LaneVGPR == AMDGPU::NoRegister) {
// We have no VGPRs left for spilling SGPRs. Reset because we will not
@@ -308,6 +298,8 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
SGPRToVGPRSpills.erase(FI);
NumVGPRSpillLanes -= I;
+ // FIXME: We can run out of free registers with split allocation if
+ // IPRA is enabled and a called function already uses every VGPR.
#if 0
DiagnosticInfoResourceLimit DiagOutOfRegs(MF.getFunction(),
"VGPRs for SGPR spilling",
@@ -340,21 +332,6 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
return true;
}
-/// Reserve a VGPR for spilling of SGPRs
-bool SIMachineFunctionInfo::reserveVGPRforSGPRSpills(MachineFunction &MF) {
- const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
- const SIRegisterInfo *TRI = ST.getRegisterInfo();
- SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
-
- Register LaneVGPR = TRI->findUnusedRegister(
- MF.getRegInfo(), &AMDGPU::VGPR_32RegClass, MF, true);
- if (LaneVGPR == Register())
- return false;
- SpillVGPRs.push_back(SGPRSpillVGPR(LaneVGPR, None));
- FuncInfo->VGPRReservedForSGPRSpill = LaneVGPR;
- return true;
-}
-
/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
/// Either AGPR is spilled to VGPR to vice versa.
/// Returns true if a \p FI can be eliminated completely.
@@ -616,24 +593,6 @@ bool SIMachineFunctionInfo::initializeBaseYamlFields(
return false;
}
-// Remove VGPR which was reserved for SGPR spills if there are no spilled SGPRs
-bool SIMachineFunctionInfo::removeVGPRForSGPRSpill(Register ReservedVGPR,
- MachineFunction &MF) {
- for (auto *i = SpillVGPRs.begin(); i < SpillVGPRs.end(); i++) {
- if (i->VGPR == ReservedVGPR) {
- SpillVGPRs.erase(i);
-
- for (MachineBasicBlock &MBB : MF) {
- MBB.removeLiveIn(ReservedVGPR);
- MBB.sortUniqueLiveIns();
- }
- this->VGPRReservedForSGPRSpill = AMDGPU::NoRegister;
- return true;
- }
- }
- return false;
-}
-
bool SIMachineFunctionInfo::usesAGPRs(const MachineFunction &MF) const {
if (UsesAGPRs)
return *UsesAGPRs;
diff --git a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 8accbf611c5f5..8e821274bb770 100644
--- a/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -502,7 +502,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
Register SGPRForBPSaveRestoreCopy;
Optional<int> BasePointerSaveIndex;
- Register VGPRReservedForSGPRSpill;
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
public:
@@ -528,7 +527,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
SpillVGPRs[Index].VGPR = NewVGPR;
SpillVGPRs[Index].FI = newFI;
- VGPRReservedForSGPRSpill = NewVGPR;
}
bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
@@ -556,7 +554,6 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
unsigned NumLane) const;
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
- bool reserveVGPRforSGPRSpills(MachineFunction &MF);
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
void removeDeadFrameIndices(MachineFrameInfo &MFI);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
index 3e8b22638324d..36bb7ca7af0e9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll
@@ -520,58 +520,58 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-NEXT: global_load_dwordx4 v[3:6], v[0:1], off
-; GCN-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:16
-; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:32
-; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:48
-; GCN-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:64
-; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:80
-; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:96
-; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:112
-; GCN-NEXT: global_load_dwordx4 v[44:47], v[0:1], off offset:128
-; GCN-NEXT: global_load_dwordx4 v[48:51], v[0:1], off offset:144
-; GCN-NEXT: global_load_dwordx4 v[52:55], v[0:1], off offset:160
-; GCN-NEXT: global_load_dwordx4 v[56:59], v[0:1], off offset:176
-; GCN-NEXT: global_load_dwordx4 v[7:10], v[0:1], off offset:192
+; GCN-NEXT: global_load_dwordx4 v[7:10], v[0:1], off offset:16
+; GCN-NEXT: global_load_dwordx4 v[16:19], v[0:1], off offset:32
+; GCN-NEXT: global_load_dwordx4 v[20:23], v[0:1], off offset:48
+; GCN-NEXT: global_load_dwordx4 v[24:27], v[0:1], off offset:64
+; GCN-NEXT: global_load_dwordx4 v[28:31], v[0:1], off offset:80
+; GCN-NEXT: global_load_dwordx4 v[32:35], v[0:1], off offset:96
+; GCN-NEXT: global_load_dwordx4 v[36:39], v[0:1], off offset:112
+; GCN-NEXT: global_load_dwordx4 v[40:43], v[0:1], off offset:128
+; GCN-NEXT: global_load_dwordx4 v[44:47], v[0:1], off offset:144
+; GCN-NEXT: global_load_dwordx4 v[48:51], v[0:1], off offset:160
+; GCN-NEXT: global_load_dwordx4 v[52:55], v[0:1], off offset:176
+; GCN-NEXT: global_load_dwordx4 v[11:14], v[0:1], off offset:192
; GCN-NEXT: s_add_i32 s32, s32, 0x10000
; GCN-NEXT: s_add_i32 s32, s32, 0xffff0000
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:576 ; 4-byte Folded Spill
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill
-; GCN-NEXT: global_load_dwordx4 v[7:10], v[0:1], off offset:208
+; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:580 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:584 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:588 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:592 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:596 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:600 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:604 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:608 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:612 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:616 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:620 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:624 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:628 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:632 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:636 ; 4-byte Folded Spill
+; GCN-NEXT: global_load_dwordx4 v[11:14], v[0:1], off offset:208
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: buffer_store_dword v3, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:512 ; 4-byte Folded Spill
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill
-; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill
-; GCN-NEXT: global_load_dwordx4 v[8:11], v[0:1], off offset:224
+; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:516 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:520 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:524 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:528 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:532 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:536 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:540 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v15, off, s[0:3], s33 offset:544 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:548 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:552 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:556 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:560 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:564 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:568 ; 4-byte Folded Spill
+; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:572 ; 4-byte Folded Spill
+; GCN-NEXT: global_load_dwordx4 v[56:59], v[0:1], off offset:224
; GCN-NEXT: global_load_dwordx4 v[12:15], v[0:1], off offset:240
; GCN-NEXT: v_and_b32_e32 v0, 31, v2
; GCN-NEXT: v_lshrrev_b32_e64 v2, 6, s33
@@ -582,50 +582,50 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s33 offset:260
; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:264
; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:268
-; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:272
-; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:276
-; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:280
-; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:284
-; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:288
-; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:292
-; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:296
-; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:300
-; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:304
-; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:308
-; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:312
-; GCN-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:316
-; GCN-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:320
-; GCN-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:324
-; GCN-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:328
-; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:332
-; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:336
-; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:340
-; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s33 offset:344
-; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:348
-; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:352
-; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:356
-; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:360
-; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:364
-; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:368
-; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:372
-; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:376
-; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:380
-; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:384
-; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:388
-; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:392
-; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:396
-; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:400
-; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:404
-; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:408
-; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:412
-; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:416
-; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:420
-; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:424
-; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:428
-; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:432
-; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:436
-; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:440
-; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:444
+; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:272
+; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:276
+; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:280
+; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:284
+; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:288
+; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:292
+; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:296
+; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:300
+; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:304
+; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:308
+; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:312
+; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:316
+; GCN-NEXT: buffer_store_dword v24, off, s[0:3], s33 offset:320
+; GCN-NEXT: buffer_store_dword v25, off, s[0:3], s33 offset:324
+; GCN-NEXT: buffer_store_dword v26, off, s[0:3], s33 offset:328
+; GCN-NEXT: buffer_store_dword v27, off, s[0:3], s33 offset:332
+; GCN-NEXT: buffer_store_dword v28, off, s[0:3], s33 offset:336
+; GCN-NEXT: buffer_store_dword v29, off, s[0:3], s33 offset:340
+; GCN-NEXT: buffer_store_dword v30, off, s[0:3], s33 offset:344
+; GCN-NEXT: buffer_store_dword v31, off, s[0:3], s33 offset:348
+; GCN-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:352
+; GCN-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:356
+; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s33 offset:360
+; GCN-NEXT: buffer_store_dword v35, off, s[0:3], s33 offset:364
+; GCN-NEXT: buffer_store_dword v36, off, s[0:3], s33 offset:368
+; GCN-NEXT: buffer_store_dword v37, off, s[0:3], s33 offset:372
+; GCN-NEXT: buffer_store_dword v38, off, s[0:3], s33 offset:376
+; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s33 offset:380
+; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:384
+; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:388
+; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:392
+; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:396
+; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:400
+; GCN-NEXT: buffer_store_dword v45, off, s[0:3], s33 offset:404
+; GCN-NEXT: buffer_store_dword v46, off, s[0:3], s33 offset:408
+; GCN-NEXT: buffer_store_dword v47, off, s[0:3], s33 offset:412
+; GCN-NEXT: buffer_store_dword v48, off, s[0:3], s33 offset:416
+; GCN-NEXT: buffer_store_dword v49, off, s[0:3], s33 offset:420
+; GCN-NEXT: buffer_store_dword v50, off, s[0:3], s33 offset:424
+; GCN-NEXT: buffer_store_dword v51, off, s[0:3], s33 offset:428
+; GCN-NEXT: buffer_store_dword v52, off, s[0:3], s33 offset:432
+; GCN-NEXT: buffer_store_dword v53, off, s[0:3], s33 offset:436
+; GCN-NEXT: buffer_store_dword v54, off, s[0:3], s33 offset:440
+; GCN-NEXT: buffer_store_dword v55, off, s[0:3], s33 offset:444
; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s33 offset:576 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v17, off, s[0:3], s33 offset:580 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v18, off, s[0:3], s33 offset:584 ; 4-byte Folded Reload
@@ -676,10 +676,10 @@ define i64 @v_extract_v32i64_varidx(<32 x i64> addrspace(1)* %ptr, i32 %idx) {
; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s33 offset:468
; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s33 offset:472
; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s33 offset:476
-; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:480
-; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:484
-; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:488
-; GCN-NEXT: buffer_store_dword v11, off, s[0:3], s33 offset:492
+; GCN-NEXT: buffer_store_dword v56, off, s[0:3], s33 offset:480
+; GCN-NEXT: buffer_store_dword v57, off, s[0:3], s33 offset:484
+; GCN-NEXT: buffer_store_dword v58, off, s[0:3], s33 offset:488
+; GCN-NEXT: buffer_store_dword v59, off, s[0:3], s33 offset:492
; GCN-NEXT: buffer_store_dword v12, off, s[0:3], s33 offset:496
; GCN-NEXT: buffer_store_dword v13, off, s[0:3], s33 offset:500
; GCN-NEXT: buffer_store_dword v14, off, s[0:3], s33 offset:504
diff --git a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
similarity index 65%
rename from llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
rename to llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
index e0e211243771f..a5bf6cea675b4 100644
--- a/llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
@@ -5,7 +5,7 @@ define void @child_function() #0 {
ret void
}
-; GCN-LABEL: {{^}}reserve_vgpr_with_no_lower_vgpr_available:
+; GCN-LABEL: {{^}}spill_sgpr_with_no_lower_vgpr_available:
; GCN: buffer_store_dword v255, off, s[0:3], s32
; GCN: v_writelane_b32 v255, s33, 2
; GCN: v_writelane_b32 v255, s30, 0
@@ -16,7 +16,7 @@ define void @child_function() #0 {
; GCN: v_readlane_b32 s33, v255, 2
; GCN: ; NumVgprs: 256
-define void @reserve_vgpr_with_no_lower_vgpr_available() #0 {
+define void @spill_sgpr_with_no_lower_vgpr_available() #0 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
@@ -51,7 +51,7 @@ define void @reserve_vgpr_with_no_lower_vgpr_available() #0 {
ret void
}
-; GCN-LABEL: {{^}}reserve_lowest_available_vgpr:
+; GCN-LABEL: {{^}}spill_to_lowest_available_vgpr:
; GCN: buffer_store_dword v254, off, s[0:3], s32
; GCN: v_writelane_b32 v254, s33, 2
; GCN: v_writelane_b32 v254, s30, 0
@@ -61,7 +61,7 @@ define void @reserve_vgpr_with_no_lower_vgpr_available() #0 {
; GCN: v_readlane_b32 s31, v254, 1
; GCN: v_readlane_b32 s33, v254, 2
-define void @reserve_lowest_available_vgpr() #0 {
+define void @spill_to_lowest_available_vgpr() #0 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
@@ -96,14 +96,14 @@ define void @reserve_lowest_available_vgpr() #0 {
ret void
}
-; GCN-LABEL: {{^}}reserve_vgpr_with_sgpr_spills:
+; GCN-LABEL: {{^}}spill_sgpr_with_sgpr_uses:
; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32
; GCN: ; def s4
; GCN: v_writelane_b32 v254, s4, 2
; GCN: v_readlane_b32 s4, v254, 2
; GCN: ; use s4
-define void @reserve_vgpr_with_sgpr_spills() #0 {
+define void @spill_sgpr_with_sgpr_uses() #0 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
@@ -147,12 +147,12 @@ ret:
ret void
}
-; GCN-LABEL: {{^}}reserve_vgpr_with_tail_call
+; GCN-LABEL: {{^}}spill_sgpr_with_tail_call
; GCN-NOT: buffer_store_dword v255, off, s[0:3], s32
; GCN-NOT: v_writelane
; GCN: s_setpc_b64 s[4:5]
-define void @reserve_vgpr_with_tail_call() #0 {
+define void @spill_sgpr_with_tail_call() #0 {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, i32 addrspace(5)* %alloca
@@ -187,17 +187,29 @@ define void @reserve_vgpr_with_tail_call() #0 {
ret void
}
-; GCN-LABEL: {{^}}reserve_vgpr_for_sgpr_spills_no_alloca:
-; GCN: v_writelane_b32 v5, s34, 0
-; GCN: v_writelane_b32 v5, s35, 1
-; GCN: v_writelane_b32 v5, s36, 2
-; GCN: v_writelane_b32 v5, s37, 3
-; GCN: v_readlane_b32 s37, v5, 3
-; GCN: v_readlane_b32 s36, v5, 2
-; GCN: v_readlane_b32 s35, v5, 1
-; GCN: v_readlane_b32 s34, v5, 0
-
-define void @reserve_vgpr_for_sgpr_spills_no_alloca(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+; Special case where all registers are explicitly clobbered in the function and
+; we have no VGPR to allocate for SGPR spills. We are forced to spill to memory.
+
+; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr:
+; GCN: v_writelane_b32 [[A:v[0-9]+]], s34, 0
+; GCN: buffer_store_dword [[A]], off, s[0:3], s32
+; GCN: v_writelane_b32 [[B:v[0-9]+]], s35, 0
+; GCN: buffer_store_dword [[B]], off, s[0:3], s32
+; GCN: v_writelane_b32 [[C:v[0-9]+]], s36, 0
+; GCN: buffer_store_dword [[C]], off, s[0:3], s32
+; GCN: v_writelane_b32 [[D:v[0-9]+]], s37, 0
+; GCN: buffer_store_dword [[D]], off, s[0:3], s32
+; GCN: #ASMEND
+; GCN: buffer_load_dword [[E:v[0-9]+]]
+; GCN: v_readlane_b32 s37, [[E]], 0
+; GCN: buffer_load_dword [[F:v[0-9]+]]
+; GCN: v_readlane_b32 s36, [[F]], 0
+; GCN: buffer_load_dword [[G:v[0-9]+]]
+; GCN: v_readlane_b32 s35, [[G]], 0
+; GCN: buffer_load_dword [[H:v[0-9]+]]
+; GCN: v_readlane_b32 s34, [[H]], 0
+
+define void @spill_sgpr_no_free_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%a = load <4 x i32>, <4 x i32> addrspace(1)* %in
call void asm sideeffect "",
"~{v6},~{v7},~{v8},~{v9}
@@ -234,4 +246,96 @@ define void @reserve_vgpr_for_sgpr_spills_no_alloca(<4 x i32> addrspace(1)* %out
ret void
}
+; If IPRA no-CSR optimization is enabled, we will not be able to allocate an
+; SGPR for VGPR spills in the parent function since this child function uses all
+; VGPRs.
+
+define internal void @child_function_ipra() #0 {
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
+ ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
+ ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
+ ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
+ ,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
+ ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
+ ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
+ ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
+ ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
+ ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
+ ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
+ ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
+ ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
+ ,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
+ ,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
+ ,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
+ ,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
+ ,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
+ ,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
+ ,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
+ ,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
+ ,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
+ ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
+ ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
+ ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
+ ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr_ipra:
+; GCN: v_writelane_b32 v0, s30, 0
+; GCN: v_writelane_b32 v0, s31, 1
+; GCN: buffer_store_dword v0, off
+; GCN: swappc
+; GCN: buffer_load_dword v0, off
+; GCN: v_readlane_b32 s30, v0, 0
+; GCN: v_readlane_b32 s31, v0, 1
+define void @spill_sgpr_no_free_vgpr_ipra() #0 {
+ call void @child_function_ipra()
+ ret void
+}
+
+define internal void @child_function_ipra_tail_call() #0 {
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
+ ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
+ ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
+ ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}
+ ,~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}
+ ,~{v50},~{v51},~{v52},~{v53},~{v54},~{v55},~{v56},~{v57},~{v58},~{v59}
+ ,~{v60},~{v61},~{v62},~{v63},~{v64},~{v65},~{v66},~{v67},~{v68},~{v69}
+ ,~{v70},~{v71},~{v72},~{v73},~{v74},~{v75},~{v76},~{v77},~{v78},~{v79}
+ ,~{v80},~{v81},~{v82},~{v83},~{v84},~{v85},~{v86},~{v87},~{v88},~{v89}
+ ,~{v90},~{v91},~{v92},~{v93},~{v94},~{v95},~{v96},~{v97},~{v98},~{v99}
+ ,~{v100},~{v101},~{v102},~{v103},~{v104},~{v105},~{v106},~{v107},~{v108},~{v109}
+ ,~{v110},~{v111},~{v112},~{v113},~{v114},~{v115},~{v116},~{v117},~{v118},~{v119}
+ ,~{v120},~{v121},~{v122},~{v123},~{v124},~{v125},~{v126},~{v127},~{v128},~{v129}
+ ,~{v130},~{v131},~{v132},~{v133},~{v134},~{v135},~{v136},~{v137},~{v138},~{v139}
+ ,~{v140},~{v141},~{v142},~{v143},~{v144},~{v145},~{v146},~{v147},~{v148},~{v149}
+ ,~{v150},~{v151},~{v152},~{v153},~{v154},~{v155},~{v156},~{v157},~{v158},~{v159}
+ ,~{v160},~{v161},~{v162},~{v163},~{v164},~{v165},~{v166},~{v167},~{v168},~{v169}
+ ,~{v170},~{v171},~{v172},~{v173},~{v174},~{v175},~{v176},~{v177},~{v178},~{v179}
+ ,~{v180},~{v181},~{v182},~{v183},~{v184},~{v185},~{v186},~{v187},~{v188},~{v189}
+ ,~{v190},~{v191},~{v192},~{v193},~{v194},~{v195},~{v196},~{v197},~{v198},~{v199}
+ ,~{v200},~{v201},~{v202},~{v203},~{v204},~{v205},~{v206},~{v207},~{v208},~{v209}
+ ,~{v210},~{v211},~{v212},~{v213},~{v214},~{v215},~{v216},~{v217},~{v218},~{v219}
+ ,~{v220},~{v221},~{v222},~{v223},~{v224},~{v225},~{v226},~{v227},~{v228},~{v229}
+ ,~{v230},~{v231},~{v232},~{v233},~{v234},~{v235},~{v236},~{v237},~{v238},~{v239}
+ ,~{v240},~{v241},~{v242},~{v243},~{v244},~{v245},~{v246},~{v247},~{v248},~{v249}
+ ,~{v250},~{v251},~{v252},~{v253},~{v254},~{v255}" () #0
+ ret void
+}
+
+; GCN-LABEL: {{^}}spill_sgpr_no_free_vgpr_ipra_tail_call:
+; GCN-NOT: v_writelane_b32
+; GCN-NOT: buffer_store_dword
+; GCN-NOT: swappc
+; GCN-NOT: buffer_load_dword v0, off
+; GCN-NOT: v_readlane_b32
+; GCN: setpc
+define void @spill_sgpr_no_free_vgpr_ipra_tail_call() #0 {
+ tail call void @child_function_ipra_tail_call()
+ ret void
+}
+
+
attributes #0 = { nounwind noinline norecurse "amdgpu-flat-work-group-size"="1,256" }
More information about the llvm-commits
mailing list