[llvm] [AMDGPU][SILowerSGPRSpills] Correct insertion of IMPLICIT_DEF in cycles (PR #186348)
Chris Jackson via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 1 20:29:18 PDT 2026
https://github.com/chrisjbris updated https://github.com/llvm/llvm-project/pull/186348
>From 6c9e1347aac4a1be056496248e97e91aab26cae5 Mon Sep 17 00:00:00 2001
From: Chris Jackson <chris.jackson at amd.com>
Date: Fri, 13 Mar 2026 04:22:48 -0500
Subject: [PATCH] [AMDGPU][SILowerSGPRSpills] Insertion of IMPLICIT_DEF in
cycle headers
si-lower-sgpr spills was observed inserting IMPLICIT_DEF for lane VGPR
restores in the cycle header. The virtual VGPR is therefore not live-in
to the header and wwm regallocfast does not insert a restore. This
results in the vgpr being clobbered after each backedge.
Correct this by inserting the IMPLICIT_DEF in the preheader.
---
llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 87 ++++++--
llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 5 +
.../CodeGen/AMDGPU/sgpr-regalloc-flags.ll | 5 +
.../si-lower-sgpr-spills-cycle-header.ll | 206 ++++++++++++++++++
.../si-lower-sgpr-spills-cycle-header.mir | 114 ++++++++++
...wer-sgpr-spills-initial-insert-in-body.mir | 72 ++++++
...er-sgpr-spills-initial-insert-in-latch.mir | 63 ++++++
...si-lower-sgpr-spills-multi-entry-cycle.mir | 149 +++++++++++++
.../AMDGPU/spill-sgpr-to-virtual-vgpr.mir | 6 +-
.../spill-vgpr-to-agpr-update-regscavenger.ll | 5 +-
10 files changed, 687 insertions(+), 25 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-cycle-header.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-cycle-header.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-initial-insert-in-body.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-initial-insert-in-latch.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-multi-entry-cycle.mir
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index 06b221e171fd4..e0f50d48d6f2c 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -22,6 +22,7 @@
#include "SIMachineFunctionInfo.h"
#include "SISpillUtils.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineCycleAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
@@ -35,6 +36,18 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>;
namespace {
+/// Insertion point for IMPLICIT_DEF: iterator may be MBB::end() and can't be
+/// dereferenced so the parent block is stored explicitly.
+struct LaneVGPRInsertPt {
+ MachineBasicBlock *MBB;
+ MachineBasicBlock::iterator It;
+};
+
+static LaneVGPRInsertPt insertPt(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator It) {
+ return {MBB, It};
+}
+
static cl::opt<unsigned> MaxNumVGPRsForWwmAllocation(
"amdgpu-num-vgprs-for-wwm-alloc",
cl::desc("Max num VGPRs for whole-wave register allocation."),
@@ -47,23 +60,26 @@ class SILowerSGPRSpills {
LiveIntervals *LIS = nullptr;
SlotIndexes *Indexes = nullptr;
MachineDominatorTree *MDT = nullptr;
+ MachineCycleInfo *MCI = nullptr;
// Save and Restore blocks of the current function. Typically there is a
// single save block, unless Windows EH funclets are involved.
MBBVector SaveBlocks;
MBBVector RestoreBlocks;
+ MachineBasicBlock *getCycleDomBB(MachineCycle *C);
+
public:
SILowerSGPRSpills(LiveIntervals *LIS, SlotIndexes *Indexes,
- MachineDominatorTree *MDT)
- : LIS(LIS), Indexes(Indexes), MDT(MDT) {}
+ MachineDominatorTree *MDT, MachineCycleInfo *MCI)
+ : LIS(LIS), Indexes(Indexes), MDT(MDT), MCI(MCI) {}
bool run(MachineFunction &MF);
void calculateSaveRestoreBlocks(MachineFunction &MF);
bool spillCalleeSavedRegs(MachineFunction &MF,
SmallVectorImpl<int> &CalleeSavedFIs);
void updateLaneVGPRDomInstr(
int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
- DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr);
+ DenseMap<Register, LaneVGPRInsertPt> &LaneVGPRDomInstr);
void determineRegsForWWMAllocation(MachineFunction &MF, BitVector &RegMask);
};
@@ -77,6 +93,7 @@ class SILowerSGPRSpillsLegacy : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTreeWrapperPass>();
+ AU.addRequired<MachineCycleInfoWrapperPass>();
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -96,6 +113,7 @@ INITIALIZE_PASS_BEGIN(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
INITIALIZE_PASS_END(SILowerSGPRSpillsLegacy, DEBUG_TYPE,
"SI lower SGPR spill instructions", false, false)
@@ -300,9 +318,27 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(
return false;
}
+MachineBasicBlock *SILowerSGPRSpills::getCycleDomBB(MachineCycle *C) {
+ // If the insertion point lands on a cycle entry, move it to a block that
+ // dominates all entries.
+ if (C->isReducible()) {
+ if (auto *IDom = MDT->getNode(C->getHeader())->getIDom())
+ return IDom->getBlock();
+ llvm_unreachable("Expected cycle to have an IDom.");
+ return nullptr;
+ }
+
+ const SmallVectorImpl<MachineBasicBlock *> &Entries = C->getEntries();
+ assert(!Entries.empty() && "Expected cycle to have at least one entry.");
+ MachineBasicBlock *EntryBB = Entries[0];
+ for (unsigned I = 1; I < Entries.size(); ++I)
+ EntryBB = MDT->findNearestCommonDominator(EntryBB, Entries[I]);
+ return EntryBB;
+}
+
void SILowerSGPRSpills::updateLaneVGPRDomInstr(
int FI, MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPt,
- DenseMap<Register, MachineBasicBlock::iterator> &LaneVGPRDomInstr) {
+ DenseMap<Register, LaneVGPRInsertPt> &LaneVGPRDomInstr) {
// For the Def of a virtual LaneVGPR to dominate all its uses, we should
// insert an IMPLICIT_DEF before the dominating spill. Switching to a
// depth first order doesn't really help since the machine function can be in
@@ -321,19 +357,21 @@ void SILowerSGPRSpills::updateLaneVGPRDomInstr(
PrevLaneVGPR = Spill.VGPR;
auto I = LaneVGPRDomInstr.find(Spill.VGPR);
if (Spill.Lane == 0 && I == LaneVGPRDomInstr.end()) {
- // Initially add the spill instruction itself for Insertion point.
- LaneVGPRDomInstr[Spill.VGPR] = InsertPt;
+ LaneVGPRDomInstr[Spill.VGPR] = insertPt(MBB, InsertPt);
} else {
assert(I != LaneVGPRDomInstr.end());
- auto PrevInsertPt = I->second;
- MachineBasicBlock *DomMBB = PrevInsertPt->getParent();
+ LaneVGPRInsertPt Prev = I->second;
+ MachineBasicBlock *PrevInsertMBB = Prev.MBB;
+ MachineBasicBlock::iterator PrevInsertPt = Prev.It;
+ MachineBasicBlock *DomMBB = PrevInsertMBB;
if (DomMBB == MBB) {
// The insertion point earlier selected in a predecessor block whose
// spills are currently being lowered. The earlier InsertPt would be
// the one just before the block terminator and it should be changed
// if we insert any new spill in it.
- if (MDT->dominates(&*InsertPt, &*PrevInsertPt))
- I->second = InsertPt;
+ if (PrevInsertPt == MBB->end() ||
+ MDT->dominates(&*InsertPt, &*PrevInsertPt))
+ I->second = insertPt(MBB, InsertPt);
continue;
}
@@ -341,10 +379,11 @@ void SILowerSGPRSpills::updateLaneVGPRDomInstr(
// Find the common dominator block between PrevInsertPt and the
// current spill.
DomMBB = MDT->findNearestCommonDominator(DomMBB, MBB);
+
if (DomMBB == MBB)
- I->second = InsertPt;
- else if (DomMBB != PrevInsertPt->getParent())
- I->second = &(*DomMBB->getFirstTerminator());
+ I->second = insertPt(MBB, InsertPt);
+ else if (DomMBB != PrevInsertMBB)
+ I->second = insertPt(DomMBB, DomMBB->getFirstTerminator());
}
}
}
@@ -394,7 +433,9 @@ bool SILowerSGPRSpillsLegacy::runOnMachineFunction(MachineFunction &MF) {
SlotIndexes *Indexes = SIWrapper ? &SIWrapper->getSI() : nullptr;
MachineDominatorTree *MDT =
&getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
- return SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);
+ MachineCycleInfo *MCI =
+ &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
+ return SILowerSGPRSpills(LIS, Indexes, MDT, MCI).run(MF);
}
bool SILowerSGPRSpills::run(MachineFunction &MF) {
@@ -438,7 +479,7 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) {
BitVector SpillFIs(MFI.getObjectIndexEnd(), false);
// To track the IMPLICIT_DEF insertion point for the lane vgprs.
- DenseMap<Register, MachineBasicBlock::iterator> LaneVGPRDomInstr;
+ DenseMap<Register, LaneVGPRInsertPt> LaneVGPRDomInstr;
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) {
@@ -492,12 +533,15 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) {
}
for (auto Reg : FuncInfo->getSGPRSpillVGPRs()) {
- auto InsertPt = LaneVGPRDomInstr[Reg];
+ LaneVGPRInsertPt IP = LaneVGPRDomInstr[Reg];
+ if (MachineCycle *C = MCI->getTopLevelParentCycle(IP.MBB)) {
+ MachineBasicBlock *AdjMBB = getCycleDomBB(C);
+ IP = insertPt(AdjMBB, AdjMBB->getFirstTerminator());
+ }
// Insert the IMPLICIT_DEF at the identified points.
- MachineBasicBlock &Block = *InsertPt->getParent();
- DebugLoc DL = Block.findDebugLoc(InsertPt);
- auto MIB =
- BuildMI(Block, *InsertPt, DL, TII->get(AMDGPU::IMPLICIT_DEF), Reg);
+ MachineBasicBlock &Block = *IP.MBB;
+ DebugLoc DL = Block.findDebugLoc(IP.It);
+ auto MIB = BuildMI(Block, IP.It, DL, TII->get(AMDGPU::IMPLICIT_DEF), Reg);
// Add WWM flag to the virtual register.
FuncInfo->setFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG);
@@ -566,6 +610,7 @@ SILowerSGPRSpillsPass::run(MachineFunction &MF,
auto *LIS = MFAM.getCachedResult<LiveIntervalsAnalysis>(MF);
auto *Indexes = MFAM.getCachedResult<SlotIndexesAnalysis>(MF);
MachineDominatorTree *MDT = &MFAM.getResult<MachineDominatorTreeAnalysis>(MF);
- SILowerSGPRSpills(LIS, Indexes, MDT).run(MF);
+ MachineCycleInfo &MCI = MFAM.getResult<MachineCycleAnalysis>(MF);
+ SILowerSGPRSpills(LIS, Indexes, MDT, &MCI).run(MF);
return PreservedAnalyses::all();
}
diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
index c41a43bf9cf48..1b376c04068ab 100644
--- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
+++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll
@@ -118,6 +118,7 @@
; GCN-O0-NEXT: SI Whole Quad Mode
; GCN-O0-NEXT: AMDGPU Pre-RA Long Branch Reg
; GCN-O0-NEXT: Fast Register Allocator
+; GCN-O0-NEXT: Machine Cycle Info Analysis
; GCN-O0-NEXT: SI lower SGPR spill instructions
; GCN-O0-NEXT: Slot index numbering
; GCN-O0-NEXT: Live Interval Analysis
@@ -384,6 +385,7 @@
; GCN-O1-NEXT: Greedy Register Allocator
; GCN-O1-NEXT: Virtual Register Rewriter
; GCN-O1-NEXT: Stack Slot Coloring
+; GCN-O1-NEXT: Machine Cycle Info Analysis
; GCN-O1-NEXT: SI lower SGPR spill instructions
; GCN-O1-NEXT: Virtual Register Map
; GCN-O1-NEXT: Live Register Matrix
@@ -708,6 +710,7 @@
; GCN-O1-OPTS-NEXT: Greedy Register Allocator
; GCN-O1-OPTS-NEXT: Virtual Register Rewriter
; GCN-O1-OPTS-NEXT: Stack Slot Coloring
+; GCN-O1-OPTS-NEXT: Machine Cycle Info Analysis
; GCN-O1-OPTS-NEXT: SI lower SGPR spill instructions
; GCN-O1-OPTS-NEXT: Virtual Register Map
; GCN-O1-OPTS-NEXT: Live Register Matrix
@@ -1037,6 +1040,7 @@
; GCN-O2-NEXT: Greedy Register Allocator
; GCN-O2-NEXT: Virtual Register Rewriter
; GCN-O2-NEXT: Stack Slot Coloring
+; GCN-O2-NEXT: Machine Cycle Info Analysis
; GCN-O2-NEXT: SI lower SGPR spill instructions
; GCN-O2-NEXT: Virtual Register Map
; GCN-O2-NEXT: Live Register Matrix
@@ -1379,6 +1383,7 @@
; GCN-O3-NEXT: Greedy Register Allocator
; GCN-O3-NEXT: Virtual Register Rewriter
; GCN-O3-NEXT: Stack Slot Coloring
+; GCN-O3-NEXT: Machine Cycle Info Analysis
; GCN-O3-NEXT: SI lower SGPR spill instructions
; GCN-O3-NEXT: Virtual Register Map
; GCN-O3-NEXT: Live Register Matrix
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
index c83af33659dad..fc5dabc584863 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-regalloc-flags.ll
@@ -18,6 +18,7 @@
; DEFAULT: Greedy Register Allocator
; DEFAULT-NEXT: Virtual Register Rewriter
; DEFAULT-NEXT: Stack Slot Coloring
+; DEFAULT-NEXT: Machine Cycle Info Analysis
; DEFAULT-NEXT: SI lower SGPR spill instructions
; DEFAULT-NEXT: Virtual Register Map
; DEFAULT-NEXT: Live Register Matrix
@@ -37,6 +38,7 @@
; DEFAULT-NEXT: Stack Slot Coloring
; O0: Fast Register Allocator
+; O0-NEXT: Machine Cycle Info Analysis
; O0-NEXT: SI lower SGPR spill instructions
; O0-NEXT: Slot index numbering
; O0-NEXT: Live Interval Analysis
@@ -61,6 +63,7 @@
; BASIC-DEFAULT-NEXT: Basic Register Allocator
; BASIC-DEFAULT-NEXT: Virtual Register Rewriter
; BASIC-DEFAULT-NEXT: Stack Slot Coloring
+; BASIC-DEFAULT-NEXT: Machine Cycle Info Analysis
; BASIC-DEFAULT-NEXT: SI lower SGPR spill instructions
; BASIC-DEFAULT-NEXT: Virtual Register Map
; BASIC-DEFAULT-NEXT: Live Register Matrix
@@ -88,6 +91,7 @@
; DEFAULT-BASIC: Greedy Register Allocator
; DEFAULT-BASIC-NEXT: Virtual Register Rewriter
; DEFAULT-BASIC-NEXT: Stack Slot Coloring
+; DEFAULT-BASIC-NEXT: Machine Cycle Info Analysis
; DEFAULT-BASIC-NEXT: SI lower SGPR spill instructions
; DEFAULT-BASIC-NEXT: Virtual Register Map
; DEFAULT-BASIC-NEXT: Live Register Matrix
@@ -117,6 +121,7 @@
; BASIC-BASIC-NEXT: Basic Register Allocator
; BASIC-BASIC-NEXT: Virtual Register Rewriter
; BASIC-BASIC-NEXT: Stack Slot Coloring
+; BASIC-BASIC-NEXT: Machine Cycle Info Analysis
; BASIC-BASIC-NEXT: SI lower SGPR spill instructions
; BASIC-BASIC-NEXT: Virtual Register Map
; BASIC-BASIC-NEXT: Live Register Matrix
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-cycle-header.ll b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-cycle-header.ll
new file mode 100644
index 0000000000000..4c0eeb10d2913
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-cycle-header.ll
@@ -0,0 +1,206 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+; REQUIRES: amdgpu-registered-target
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -O0 -amdgpu-spill-sgpr-to-vgpr=1 \
+; RUN: -verify-machineinstrs -stop-after=si-lower-sgpr-spills -o - %s | FileCheck %s
+
+;; Ensure that si-lower-sgpr-spills prevents IMPLICIT_DEF assignments from clobbering
+;; backedge writes by placing the assignment in the cycle preheader and not the header.
+
+define amdgpu_kernel void @loop_sgpr_spill_implicit_def_in_preheader(
+ ; CHECK-LABEL: name: loop_sgpr_spill_implicit_def_in_preheader
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $sgpr8_sgpr9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s64) from constant-pool, align 16, addrspace 4)
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr8_sgpr9, 0, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 16, addrspace 4)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 1, [[DEF]], implicit killed $sgpr4_sgpr5
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr4 = S_LOAD_DWORD_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (load (s32) from `ptr addrspace(4) poison`, addrspace 4)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 2, [[DEF]]
+ ; CHECK-NEXT: renamable $sgpr11 = COPY renamable $sgpr4
+ ; CHECK-NEXT: renamable $sgpr10 = COPY renamable $sgpr4
+ ; CHECK-NEXT: renamable $sgpr9 = COPY renamable $sgpr4
+ ; CHECK-NEXT: renamable $sgpr8 = COPY renamable $sgpr4
+ ; CHECK-NEXT: renamable $sgpr7 = COPY renamable $sgpr4
+ ; CHECK-NEXT: renamable $sgpr6 = COPY renamable $sgpr4
+ ; CHECK-NEXT: renamable $sgpr5 = COPY renamable $sgpr4
+ ; CHECK-NEXT: renamable $sgpr12 = S_MOV_B32 0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 3, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr11, 4, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 5, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr9, 6, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr8, 7, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 8, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 9, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 10, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 11, [[DEF]]
+ ; CHECK-NEXT: S_BRANCH %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.loop.header:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 3
+ ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 4
+ ; CHECK-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 5
+ ; CHECK-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 6
+ ; CHECK-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 7
+ ; CHECK-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 8
+ ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 9
+ ; CHECK-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 10
+ ; CHECK-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 11
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 12, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr11, 13, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 14, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr9, 15, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr8, 16, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 17, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 18, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 19, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr6, 20, [[DEF]]
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 -1
+ ; CHECK-NEXT: renamable $sgpr7 = S_MOV_B32 0
+ ; CHECK-NEXT: S_CMP_EQ_U32 renamable $sgpr6, killed renamable $sgpr7, implicit-def $scc
+ ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_CSELECT_B64 -1, 0, implicit killed $scc
+ ; CHECK-NEXT: renamable $sgpr6_sgpr7 = S_XOR_B64 killed renamable $sgpr6_sgpr7, renamable $sgpr4_sgpr5, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF
+ ; CHECK-NEXT: renamable $sgpr6 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 21, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 22, [[DEF]], implicit killed $sgpr4_sgpr5
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit killed $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.loop.latch:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 20
+ ; CHECK-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 12
+ ; CHECK-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 13
+ ; CHECK-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 14
+ ; CHECK-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 15
+ ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 16
+ ; CHECK-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 17
+ ; CHECK-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 18
+ ; CHECK-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 19
+ ; CHECK-NEXT: renamable $sgpr5 = S_MOV_B32 1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr5, 23, [[DEF]]
+ ; CHECK-NEXT: renamable $sgpr7 = S_ADD_I32 renamable $sgpr7, renamable $sgpr5, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr8 = S_ADD_I32 renamable $sgpr8, renamable $sgpr5, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr9 = S_ADD_I32 renamable $sgpr9, renamable $sgpr5, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr10 = S_ADD_I32 renamable $sgpr10, renamable $sgpr5, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr11 = S_ADD_I32 renamable $sgpr11, renamable $sgpr5, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr12 = S_ADD_I32 renamable $sgpr12, renamable $sgpr5, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr13 = S_ADD_I32 renamable $sgpr13, renamable $sgpr5, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr14 = S_ADD_I32 renamable $sgpr6, renamable $sgpr5, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr6 = nuw nsw S_ADD_I32 renamable $sgpr4, renamable $sgpr5, implicit-def dead $scc
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr14, 24, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr13, 25, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 26, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr11, 27, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 28, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr9, 29, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr8, 30, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 31, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 32, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr4, 21, [[DEF]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 22, [[DEF]], implicit killed $sgpr4_sgpr5
+ ; CHECK-NEXT: S_BRANCH %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.Flow:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vcc_lo = SI_RESTORE_S32_FROM_VGPR [[DEF]], 21, implicit-def $vcc
+ ; CHECK-NEXT: $vcc_hi = SI_RESTORE_S32_FROM_VGPR [[DEF]], 22
+ ; CHECK-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 24
+ ; CHECK-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 25
+ ; CHECK-NEXT: $sgpr8 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 26
+ ; CHECK-NEXT: $sgpr9 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 27
+ ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 28
+ ; CHECK-NEXT: $sgpr11 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 29
+ ; CHECK-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 30
+ ; CHECK-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 31
+ ; CHECK-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 32
+ ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 -1
+ ; CHECK-NEXT: renamable $vcc = S_XOR_B64 killed renamable $vcc, killed renamable $sgpr4_sgpr5, implicit-def dead $scc
+ ; CHECK-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 19
+ ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 18
+ ; CHECK-NEXT: renamable $vcc = S_AND_B64 $exec, killed renamable $vcc, implicit-def dead $scc
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr14, 3, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr13, 4, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 5, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr11, 6, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 7, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr9, 8, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr8, 9, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr7, 10, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr6, 11, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr5, 33, [[DEF]]
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr4, 34, [[DEF]]
+ ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
+ ; CHECK-NEXT: S_BRANCH %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.exit:
+ ; CHECK-NEXT: $sgpr4 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0, implicit-def $sgpr4_sgpr5
+ ; CHECK-NEXT: $sgpr5 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 1
+ ; CHECK-NEXT: $sgpr7 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 34
+ ; CHECK-NEXT: $sgpr6 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 33
+ ; CHECK-NEXT: renamable $sgpr6 = S_ADD_I32 killed renamable $sgpr6, killed renamable $sgpr7, implicit-def dead $scc
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr6
+ ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[V_MOV_B32_e32_]], [[COPY]], killed renamable $sgpr4_sgpr5, 0, 0, implicit $exec :: (volatile store (s32) into %ir.out.load, addrspace 1)
+ ; CHECK-NEXT: S_ENDPGM 0
+ ptr addrspace(1) %out) local_unnamed_addr #0 {
+entry:
+ %a0 = load i32, ptr addrspace(4) poison
+ %a1 = load i32, ptr addrspace(4) poison
+ %a2 = load i32, ptr addrspace(4) poison
+ %a3 = load i32, ptr addrspace(4) poison
+ %a4 = load i32, ptr addrspace(4) poison
+ %a5 = load i32, ptr addrspace(4) poison
+ %a6 = load i32, ptr addrspace(4) poison
+ %a7 = load i32, ptr addrspace(4) poison
+ br label %loop.header
+
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %v0 = phi i32 [ %a0, %entry ], [ %t0, %loop.latch ]
+ %v1 = phi i32 [ %a1, %entry ], [ %t1, %loop.latch ]
+ %v2 = phi i32 [ %a2, %entry ], [ %t2, %loop.latch ]
+ %v3 = phi i32 [ %a3, %entry ], [ %t3, %loop.latch ]
+ %v4 = phi i32 [ %a4, %entry ], [ %t4, %loop.latch ]
+ %v5 = phi i32 [ %a5, %entry ], [ %t5, %loop.latch ]
+ %v6 = phi i32 [ %a6, %entry ], [ %t6, %loop.latch ]
+ %v7 = phi i32 [ %a7, %entry ], [ %t7, %loop.latch ]
+ %cmp = icmp eq i32 %iv, 0
+ br i1 %cmp, label %loop.latch, label %exit
+
+loop.latch:
+ %t0 = add i32 %v0, 1
+ %t1 = add i32 %v1, 1
+ %t2 = add i32 %v2, 1
+ %t3 = add i32 %v3, 1
+ %t4 = add i32 %v4, 1
+ %t5 = add i32 %v5, 1
+ %t6 = add i32 %v6, 1
+ %t7 = add i32 %v7, 1
+ %iv.next = add nuw nsw i32 %iv, 1
+ br label %loop.header
+
+exit:
+ %sum = add i32 %v0, %v1
+ store volatile i32 %sum, ptr addrspace(1) %out
+ ret void
+}
+
+attributes #0 = { nounwind "amdgpu-num-sgpr"="24" "amdgpu-num-vgpr"="64" }
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-cycle-header.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-cycle-header.mir
new file mode 100644
index 0000000000000..b34ec250e566d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-cycle-header.mir
@@ -0,0 +1,114 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR-SPILL %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -start-before=si-lower-sgpr-spills -stop-after=regallocfast,1 -verify-machineinstrs %s -o - | FileCheck -check-prefix=WWM-REGALLOC %s
+
+
+# When SGPR spills to a virtual VGPR lane occur in both a loop header and the latch,
+# the IMPLICIT_DEF for the lane VGPR must be placed in the preheader (not the header).
+# Establish that the virtual VGPR is live-in to the header and wwm regallocfast inserts
+# a restore, preserving the latch writes.
+
+---
+name: sgpr_spill_loop_header_and_latch_implicit_def_in_preheader
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+machineFunctionInfo:
+ isEntryFunction: false
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ frameOffsetReg: '$sgpr33'
+ hasSpilledSGPRs: true
+body: |
+ ; SGPR-SPILL-LABEL: name: sgpr_spill_loop_header_and_latch_implicit_def_in_preheader
+ ; SGPR-SPILL: bb.0:
+ ; SGPR-SPILL-NEXT: successors: %bb.1(0x80000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.1
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.1:
+ ; SGPR-SPILL-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; SGPR-SPILL-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ ; SGPR-SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
+ ; SGPR-SPILL-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.3
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.2:
+ ; SGPR-SPILL-NEXT: successors: %bb.1(0x80000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; SGPR-SPILL-NEXT: $sgpr10 = S_MOV_B32 1
+ ; SGPR-SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.1
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.3:
+ ; SGPR-SPILL-NEXT: liveins: $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: S_SETPC_B64 $sgpr30_sgpr31
+ ;
+ ; WWM-REGALLOC-LABEL: name: sgpr_spill_loop_header_and_latch_implicit_def_in_preheader
+ ; WWM-REGALLOC: bb.0:
+ ; WWM-REGALLOC-NEXT: successors: %bb.1(0x80000000)
+ ; WWM-REGALLOC-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: renamable $vgpr63 = IMPLICIT_DEF
+ ; WWM-REGALLOC-NEXT: S_BRANCH %bb.1
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: bb.1:
+ ; WWM-REGALLOC-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; WWM-REGALLOC-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; WWM-REGALLOC-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0
+ ; WWM-REGALLOC-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ ; WWM-REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr10, 0, $vgpr63
+ ; WWM-REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr63, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; WWM-REGALLOC-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; WWM-REGALLOC-NEXT: S_BRANCH %bb.3
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: bb.2:
+ ; WWM-REGALLOC-NEXT: successors: %bb.1(0x80000000)
+ ; WWM-REGALLOC-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; WWM-REGALLOC-NEXT: dead $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0
+ ; WWM-REGALLOC-NEXT: $sgpr10 = S_MOV_B32 1
+ ; WWM-REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr10, 0, $vgpr63
+ ; WWM-REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr63, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; WWM-REGALLOC-NEXT: S_BRANCH %bb.1
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: bb.3:
+ ; WWM-REGALLOC-NEXT: liveins: $sgpr30_sgpr31
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: S_SETPC_B64 killed $sgpr30_sgpr31
+
+
+ bb.0:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ S_BRANCH %bb.1
+ bb.1:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ S_BRANCH %bb.3
+ bb.2:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ $sgpr10 = S_MOV_B32 1
+ SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ S_BRANCH %bb.1
+ bb.3:
+ liveins: $sgpr30_sgpr31
+ S_SETPC_B64 $sgpr30_sgpr31
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-initial-insert-in-body.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-initial-insert-in-body.mir
new file mode 100644
index 0000000000000..3655376703d72
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-initial-insert-in-body.mir
@@ -0,0 +1,72 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR-SPILL %s
+
+---
+name: sgpr_spill_initial_insert_in_body_moves_to_preheader
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+machineFunctionInfo:
+ isEntryFunction: false
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ frameOffsetReg: '$sgpr33'
+ hasSpilledSGPRs: true
+body: |
+ ; SGPR-SPILL-LABEL: name: sgpr_spill_initial_insert_in_body_moves_to_preheader
+ ; SGPR-SPILL: bb.0:
+ ; SGPR-SPILL-NEXT: successors: %bb.1(0x80000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.1
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.1:
+ ; SGPR-SPILL-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ ; SGPR-SPILL-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.4
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.2:
+ ; SGPR-SPILL-NEXT: successors: %bb.3(0x80000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; SGPR-SPILL-NEXT: $sgpr10 = S_MOV_B32 1
+ ; SGPR-SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.3
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.3:
+ ; SGPR-SPILL-NEXT: successors: %bb.1(0x80000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.1
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.4:
+ ; SGPR-SPILL-NEXT: liveins: $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: S_SETPC_B64 $sgpr30_sgpr31
+ bb.0:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ S_BRANCH %bb.1
+ bb.1:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ S_BRANCH %bb.4
+ bb.2:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ $sgpr10 = S_MOV_B32 1
+ SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ S_BRANCH %bb.3
+ bb.3:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ S_BRANCH %bb.1
+ bb.4:
+ liveins: $sgpr30_sgpr31
+ S_SETPC_B64 $sgpr30_sgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-initial-insert-in-latch.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-initial-insert-in-latch.mir
new file mode 100644
index 0000000000000..eb4f63a4e2b0c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-initial-insert-in-latch.mir
@@ -0,0 +1,63 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR-SPILL %s
+
+---
+name: sgpr_spill_initial_insert_in_latch_moves_to_preheader
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+machineFunctionInfo:
+ isEntryFunction: false
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ frameOffsetReg: '$sgpr33'
+ hasSpilledSGPRs: true
+body: |
+ ; SGPR-SPILL-LABEL: name: sgpr_spill_initial_insert_in_latch_moves_to_preheader
+ ; SGPR-SPILL: bb.0:
+ ; SGPR-SPILL-NEXT: successors: %bb.1(0x80000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.1
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.1:
+ ; SGPR-SPILL-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ ; SGPR-SPILL-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.3
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.2:
+ ; SGPR-SPILL-NEXT: successors: %bb.1(0x80000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; SGPR-SPILL-NEXT: $sgpr10 = S_MOV_B32 1
+ ; SGPR-SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.1
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.3:
+ ; SGPR-SPILL-NEXT: liveins: $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: S_SETPC_B64 $sgpr30_sgpr31
+ bb.0:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ S_BRANCH %bb.1
+ bb.1:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.2, implicit killed $scc
+ S_BRANCH %bb.3
+ bb.2:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ $sgpr10 = S_MOV_B32 1
+ SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ S_BRANCH %bb.1
+ bb.3:
+ liveins: $sgpr30_sgpr31
+ S_SETPC_B64 $sgpr30_sgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-multi-entry-cycle.mir b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-multi-entry-cycle.mir
new file mode 100644
index 0000000000000..73dd7637e6e25
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-lower-sgpr-spills-multi-entry-cycle.mir
@@ -0,0 +1,149 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR-SPILL %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -O0 -start-before=si-lower-sgpr-spills -stop-after=regallocfast,1 -verify-machineinstrs %s -o - | FileCheck -check-prefix=WWM-REGALLOC %s
+
+# Ensure that for a multi-entry cycle si-lower-sgpr-spills inserts
+# IMPLICIT_DEF into the NCD of the cycle's entries.
+---
+name: sgpr_spill_multi_entry_implicit_def
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 4
+stack:
+ - { id: 0, type: spill-slot, size: 4, alignment: 4, stack-id: sgpr-spill }
+machineFunctionInfo:
+ isEntryFunction: false
+ scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
+ stackPtrOffsetReg: '$sgpr32'
+ frameOffsetReg: '$sgpr33'
+ hasSpilledSGPRs: true
+body: |
+ ; SGPR-SPILL-LABEL: name: sgpr_spill_multi_entry_implicit_def
+ ; SGPR-SPILL: bb.0:
+ ; SGPR-SPILL-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ ; SGPR-SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; SGPR-SPILL-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.2
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.1:
+ ; SGPR-SPILL-NEXT: successors: %bb.3(0x80000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: S_NOP 0
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.3
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.2:
+ ; SGPR-SPILL-NEXT: successors: %bb.4(0x80000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: S_NOP 0
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.4
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.3:
+ ; SGPR-SPILL-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; SGPR-SPILL-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ ; SGPR-SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
+ ; SGPR-SPILL-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.5
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.4:
+ ; SGPR-SPILL-NEXT: successors: %bb.3(0x80000000)
+ ; SGPR-SPILL-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
+ ; SGPR-SPILL-NEXT: $sgpr10 = S_MOV_B32 1
+ ; SGPR-SPILL-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
+ ; SGPR-SPILL-NEXT: S_BRANCH %bb.3
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: bb.5:
+ ; SGPR-SPILL-NEXT: liveins: $sgpr30_sgpr31
+ ; SGPR-SPILL-NEXT: {{ $}}
+ ; SGPR-SPILL-NEXT: S_SETPC_B64 $sgpr30_sgpr31
+ ;
+ ; WWM-REGALLOC-LABEL: name: sgpr_spill_multi_entry_implicit_def
+ ; WWM-REGALLOC: bb.0:
+ ; WWM-REGALLOC-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; WWM-REGALLOC-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ ; WWM-REGALLOC-NEXT: renamable $vgpr63 = IMPLICIT_DEF
+ ; WWM-REGALLOC-NEXT: S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+ ; WWM-REGALLOC-NEXT: S_BRANCH %bb.2
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: bb.1:
+ ; WWM-REGALLOC-NEXT: successors: %bb.3(0x80000000)
+ ; WWM-REGALLOC-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: S_NOP 0
+ ; WWM-REGALLOC-NEXT: S_BRANCH %bb.3
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: bb.2:
+ ; WWM-REGALLOC-NEXT: successors: %bb.4(0x80000000)
+ ; WWM-REGALLOC-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: S_NOP 0
+ ; WWM-REGALLOC-NEXT: S_BRANCH %bb.4
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: bb.3:
+ ; WWM-REGALLOC-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
+ ; WWM-REGALLOC-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; WWM-REGALLOC-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0
+ ; WWM-REGALLOC-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ ; WWM-REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr10, 0, $vgpr63
+ ; WWM-REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr63, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; WWM-REGALLOC-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc
+ ; WWM-REGALLOC-NEXT: S_BRANCH %bb.5
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: bb.4:
+ ; WWM-REGALLOC-NEXT: successors: %bb.3(0x80000000)
+ ; WWM-REGALLOC-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: $vgpr63 = SI_SPILL_WWM_V32_RESTORE %stack.1, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+ ; WWM-REGALLOC-NEXT: dead $sgpr10 = SI_RESTORE_S32_FROM_VGPR $vgpr63, 0
+ ; WWM-REGALLOC-NEXT: $sgpr10 = S_MOV_B32 1
+ ; WWM-REGALLOC-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr10, 0, $vgpr63
+ ; WWM-REGALLOC-NEXT: SI_SPILL_WWM_V32_SAVE killed $vgpr63, %stack.1, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+ ; WWM-REGALLOC-NEXT: S_BRANCH %bb.3
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: bb.5:
+ ; WWM-REGALLOC-NEXT: liveins: $sgpr30_sgpr31
+ ; WWM-REGALLOC-NEXT: {{ $}}
+ ; WWM-REGALLOC-NEXT: S_SETPC_B64 killed $sgpr30_sgpr31
+ bb.0:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ S_CBRANCH_SCC1 %bb.1, implicit killed $scc
+ S_BRANCH %bb.4
+ bb.1:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ S_NOP 0
+ S_BRANCH %bb.2
+ bb.4:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ S_NOP 0
+ S_BRANCH %bb.3
+ bb.2:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
+ SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ S_CBRANCH_SCC1 %bb.3, implicit killed $scc
+ S_BRANCH %bb.5
+ bb.3:
+ liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
+ renamable $sgpr10 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ $sgpr10 = S_MOV_B32 1
+ SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32
+ S_BRANCH %bb.2
+ bb.5:
+ liveins: $sgpr30_sgpr31
+ S_SETPC_B64 $sgpr30_sgpr31
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
diff --git a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
index fa3fd3bc6da5b..bb47603647733 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
@@ -241,13 +241,14 @@ body: |
; GCN-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
; GCN-NEXT: S_NOP 0
+ ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: S_BRANCH %bb.3
; GCN-NEXT: {{ $}}
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.2(0x80000000)
; GCN-NEXT: liveins: $sgpr10, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR %0, 0
+ ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
; GCN-NEXT: $sgpr10 = S_ADD_I32 $sgpr10, 15, implicit-def dead $scc
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
@@ -255,7 +256,7 @@ body: |
; GCN-NEXT: successors: %bb.3(0x80000000)
; GCN-NEXT: liveins: $sgpr10, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR %0, 0
+ ; GCN-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR [[DEF]], 0
; GCN-NEXT: $sgpr10 = S_ADD_I32 $sgpr10, 20, implicit-def dead $scc
; GCN-NEXT: S_BRANCH %bb.3
; GCN-NEXT: {{ $}}
@@ -264,7 +265,6 @@ body: |
; GCN-NEXT: liveins: $sgpr10, $sgpr11, $sgpr30_sgpr31
; GCN-NEXT: {{ $}}
; GCN-NEXT: $sgpr10 = S_MOV_B32 10
- ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
; GCN-NEXT: S_CMP_EQ_U32 $sgpr11, 0, implicit-def $scc
; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc
diff --git a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
index 47b976abf12ff..1e68c4b6a190a 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
+++ b/llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll
@@ -13,6 +13,7 @@ define void @test() {
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
+; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; CHECK-NEXT: .LBB0_1: ; %bb.1
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_cbranch_scc1 .LBB0_3
@@ -20,6 +21,9 @@ define void @test() {
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: .LBB0_3: ; %bb.3
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
+; CHECK-NEXT: v_accvgpr_read_b32 v1, a0 ; Reload Reuse
+; CHECK-NEXT: s_mov_b64 exec, s[10:11]
; CHECK-NEXT: ; implicit-def: $sgpr4
; CHECK-NEXT: v_mov_b32_e32 v0, s4
; CHECK-NEXT: v_readfirstlane_b32 s6, v0
@@ -29,7 +33,6 @@ define void @test() {
; CHECK-NEXT: s_cselect_b64 s[6:7], -1, 0
; CHECK-NEXT: s_xor_b64 s[6:7], s[6:7], s[4:5]
; CHECK-NEXT: s_and_b64 vcc, exec, s[6:7]
-; CHECK-NEXT: ; implicit-def: $vgpr1 : SGPR spill to VGPR lane
; CHECK-NEXT: v_writelane_b32 v1, s4, 0
; CHECK-NEXT: v_writelane_b32 v1, s5, 1
; CHECK-NEXT: s_or_saveexec_b64 s[10:11], -1
More information about the llvm-commits
mailing list