[llvm] [Draft] Support save/restore point splitting in shrink-wrap (PR #119359)
Elizaveta Noskova via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 1 01:01:53 PST 2025
https://github.com/enoskova-sc updated https://github.com/llvm/llvm-project/pull/119359
>From e7c0fb90f0ba2e7661a15298d78fabea46c2ccf0 Mon Sep 17 00:00:00 2001
From: ens-sc <elizaveta.noskova at syntacore.com>
Date: Mon, 16 Sep 2024 18:48:13 +0300
Subject: [PATCH 1/2] [llvm] support multiple save/restore points in
prolog-epilog
With this patch the possibility to store multiple Save and Restore points in MachineFrameInfo appears.
As the logical consequnce of it, the notions "Save point" / "Restore point"
are no longer synonyms for "Prolog" / "Epilog". Currently, "Prolog" / "Epilog"
is the place for stack allocation / deallocation and
"Save point" / "Restore point" is the place for register spills and restores.
So, now we need to store in MachineFrameInfo not only vector of Save and vector of Restore blocks,
but Prolog and Epilog.
As we assume to have multiple Save and Restore points we need to know the list of registers,
we store / restore in each point. Threfore our SavePoint become a pair <MachineBasicBlock, std::vector<Register>>.
The full support for operating with multiple Save / Restore points is supported only in RISCV backend.
---
llvm/include/llvm/CodeGen/MachineFrameInfo.h | 49 +++-
llvm/lib/CodeGen/MIRPrinter.cpp | 1 +
llvm/lib/CodeGen/MachineFrameInfo.cpp | 1 +
llvm/lib/CodeGen/PrologEpilogInserter.cpp | 233 +++++++++++++-----
llvm/lib/CodeGen/ShrinkWrap.cpp | 1 +
llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 131 ++++++++--
llvm/lib/Target/RISCV/RISCVFrameLowering.h | 4 +
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 54 ++++
.../CodeGen/AArch64/live-debugvalues-sve.mir | 12 +-
.../CodeGen/PowerPC/common-chain-aix32.ll | 6 +-
llvm/test/CodeGen/PowerPC/common-chain.ll | 8 +-
.../PowerPC/loop-instr-form-prepare.ll | 28 +--
.../CodeGen/PowerPC/lsr-profitable-chain.ll | 32 +--
.../CodeGen/PowerPC/more-dq-form-prepare.ll | 81 +++++-
llvm/test/CodeGen/PowerPC/pr43527.ll | 2 +-
llvm/test/CodeGen/PowerPC/shrink-wrap.ll | 204 +++++++--------
16 files changed, 600 insertions(+), 247 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 50ce93104ab53..95f0c5d40dab6 100644
--- a/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -335,9 +335,16 @@ class MachineFrameInfo {
/// stack objects like arguments so we can't treat them as immutable.
bool HasTailCall = false;
- /// Not empty, if shrink-wrapping found a better place for the prologue.
+ /// Not null, if shrink-wrapping found a better place for the prologue.
+ MachineBasicBlock *Prolog = nullptr;
+ /// Not null, if shrink-wrapping found a better place for the epilogue.
+ MachineBasicBlock *Epilog = nullptr;
+
+ /// Not empty, if shrink-wrapping found a better place for saving callee
+ /// saves.
SaveRestorePoints SavePoints;
- /// Not empty, if shrink-wrapping found a better place for the epilogue.
+ /// Not empty, if shrink-wrapping found a better place for restoring callee
+ /// saves.
SaveRestorePoints RestorePoints;
/// Size of the UnsafeStack Frame
@@ -828,6 +835,39 @@ class MachineFrameInfo {
/// \copydoc getCalleeSavedInfo()
std::vector<CalleeSavedInfo> &getCalleeSavedInfo() { return CSInfo; }
+ /// Returns callee saved info vector for provided save point in
+ /// the current function.
+ std::vector<CalleeSavedInfo> getSaveCSInfo(MachineBasicBlock *MBB) const {
+ return SavePoints.lookup(MBB);
+ }
+
+ /// Returns callee saved info vector for provided restore point
+ /// in the current function.
+ const std::vector<CalleeSavedInfo>
+ getRestoreCSInfo(MachineBasicBlock *MBB) const {
+ return RestorePoints.lookup(MBB);
+ }
+
+ MachineBasicBlock *findSpilledIn(const CalleeSavedInfo &Match) const {
+ for (auto [BB, CSIV] : SavePoints) {
+ for (auto &CSI : CSIV) {
+ if (CSI.getReg() == Match.getReg())
+ return BB;
+ }
+ }
+ return nullptr;
+ }
+
+ MachineBasicBlock *findRestoredIn(const CalleeSavedInfo &Match) const {
+ for (auto [BB, CSIV] : RestorePoints) {
+ for (auto &CSI : CSIV) {
+ if (CSI.getReg() == Match.getReg())
+ return BB;
+ }
+ }
+ return nullptr;
+ }
+
/// Used by prolog/epilog inserter to set the function's callee saved
/// information.
void setCalleeSavedInfo(std::vector<CalleeSavedInfo> CSI) {
@@ -851,6 +891,11 @@ class MachineFrameInfo {
RestorePoints = std::move(NewRestorePoints);
}
+ MachineBasicBlock *getProlog() const { return Prolog; }
+ void setProlog(MachineBasicBlock *BB) { Prolog = BB; }
+ MachineBasicBlock *getEpilog() const { return Epilog; }
+ void setEpilog(MachineBasicBlock *BB) { Epilog = BB; }
+
void clearSavePoints() { SavePoints.clear(); }
void clearRestorePoints() { RestorePoints.clear(); }
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 1d54d72336860..a62bd23824eb5 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -152,6 +152,7 @@ convertSRPoints(ModuleSlotTracker &MST,
std::vector<yaml::SaveRestorePointEntry> &YamlSRPoints,
const llvm::SaveRestorePoints &SRPoints,
const TargetRegisterInfo *TRI);
+
static void convertStackObjects(yaml::MachineFunction &YMF,
const MachineFunction &MF,
ModuleSlotTracker &MST, MFPrintState &State);
diff --git a/llvm/lib/CodeGen/MachineFrameInfo.cpp b/llvm/lib/CodeGen/MachineFrameInfo.cpp
index aed68afb4eb1b..c6658d2e9eba8 100644
--- a/llvm/lib/CodeGen/MachineFrameInfo.cpp
+++ b/llvm/lib/CodeGen/MachineFrameInfo.cpp
@@ -244,6 +244,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
}
OS << "\n";
}
+
OS << "save/restore points:\n";
if (!SavePoints.empty()) {
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 41efe622417c8..ea9d830d37193 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -85,8 +85,12 @@ class PEIImpl {
unsigned MinCSFrameIndex = std::numeric_limits<unsigned>::max();
unsigned MaxCSFrameIndex = 0;
- // Save and Restore blocks of the current function. Typically there is a
- // single save block, unless Windows EH funclets are involved.
+ // Prolog and Epilog blocks of the current function. Typically there is a
+ // single Prolog block, unless Windows EH funclets are involved.
+ MBBVector PrologBlocks;
+ MBBVector EpilogBlocks;
+
+ // Save and Restore blocks of the current function.
MBBVector SaveBlocks;
MBBVector RestoreBlocks;
@@ -104,6 +108,7 @@ class PEIImpl {
void calculateCallFrameInfo(MachineFunction &MF);
void calculateSaveRestoreBlocks(MachineFunction &MF);
+ void calculatePrologEpilogBlocks(MachineFunction &MF);
void spillCalleeSavedRegs(MachineFunction &MF);
void calculateFrameObjectOffsets(MachineFunction &MF);
@@ -236,14 +241,17 @@ bool PEIImpl::run(MachineFunction &MF) {
// information. Also eliminates call frame pseudo instructions.
calculateCallFrameInfo(MF);
- // Determine placement of CSR spill/restore code and prolog/epilog code:
+ // Determine placement of CSR spill/restore code:
// place all spills in the entry block, all restores in return blocks.
calculateSaveRestoreBlocks(MF);
+ // Determine placement of prolog/epilog code.
+ calculatePrologEpilogBlocks(MF);
+
// Stash away DBG_VALUEs that should not be moved by insertion of prolog code.
SavedDbgValuesMap EntryDbgValues;
- for (MachineBasicBlock *SaveBlock : SaveBlocks)
- stashEntryDbgValues(*SaveBlock, EntryDbgValues);
+ for (MachineBasicBlock *PrologBlock : PrologBlocks)
+ stashEntryDbgValues(*PrologBlock, EntryDbgValues);
// Handle CSR spilling and restoring, for targets that need it.
if (MF.getTarget().usesPhysRegsForValues())
@@ -351,6 +359,8 @@ bool PEIImpl::run(MachineFunction &MF) {
delete RS;
SaveBlocks.clear();
RestoreBlocks.clear();
+ PrologBlocks.clear();
+ EpilogBlocks.clear();
MFI.clearSavePoints();
MFI.clearRestorePoints();
return true;
@@ -419,6 +429,25 @@ void PEIImpl::calculateCallFrameInfo(MachineFunction &MF) {
}
}
+/// Compute two sets of blocks for placing prolog and epilog code respectively.
+void PEIImpl::calculatePrologEpilogBlocks(MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ MachineBasicBlock *Prolog = MFI.getProlog();
+ MachineBasicBlock *Epilog = MFI.getEpilog();
+
+ if (Prolog)
+ PrologBlocks.push_back(Prolog);
+
+ if (Epilog)
+ EpilogBlocks.push_back(Epilog);
+
+ if (!Prolog && !SaveBlocks.empty())
+ PrologBlocks = SaveBlocks;
+
+ if (!Epilog && !RestoreBlocks.empty())
+ EpilogBlocks = RestoreBlocks;
+}
+
/// Compute the sets of entry and return blocks for saving and restoring
/// callee-saved registers, and placing prolog and epilog code.
void PEIImpl::calculateSaveRestoreBlocks(MachineFunction &MF) {
@@ -550,8 +579,8 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
/// Helper function to update the liveness information for the callee-saved
/// registers.
-static void updateLiveness(MachineFunction &MF) {
- MachineFrameInfo &MFI = MF.getFrameInfo();
+static void updateLiveness(MachineFunction &MF, MachineBasicBlock *Save,
+ MachineBasicBlock *Restore, CalleeSavedInfo &Info) {
// Visited will contain all the basic blocks that are in the region
// where the callee saved registers are alive:
// - Anything that is not Save or Restore -> LiveThrough.
@@ -563,12 +592,6 @@ static void updateLiveness(MachineFunction &MF) {
SmallVector<MachineBasicBlock *, 8> WorkList;
MachineBasicBlock *Entry = &MF.front();
- assert(MFI.getSavePoints().size() < 2 &&
- "Multiple save points not yet supported!");
- MachineBasicBlock *Save = MFI.getSavePoints().empty()
- ? nullptr
- : (*MFI.getSavePoints().begin()).first;
-
if (!Save)
Save = Entry;
@@ -578,11 +601,6 @@ static void updateLiveness(MachineFunction &MF) {
}
Visited.insert(Save);
- assert(MFI.getRestorePoints().size() < 2 &&
- "Multiple restore points not yet supported!");
- MachineBasicBlock *Restore = MFI.getRestorePoints().empty()
- ? nullptr
- : (*MFI.getRestorePoints().begin()).first;
if (Restore)
// By construction Restore cannot be visited, otherwise it
// means there exists a path to Restore that does not go
@@ -602,30 +620,26 @@ static void updateLiveness(MachineFunction &MF) {
WorkList.push_back(SuccBB);
}
- const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
-
MachineRegisterInfo &MRI = MF.getRegInfo();
- for (const CalleeSavedInfo &I : CSI) {
- for (MachineBasicBlock *MBB : Visited) {
- MCRegister Reg = I.getReg();
- // Add the callee-saved register as live-in.
- // It's killed at the spill.
- if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg))
- MBB->addLiveIn(Reg);
- }
- // If callee-saved register is spilled to another register rather than
- // spilling to stack, the destination register has to be marked as live for
- // each MBB between the prologue and epilogue so that it is not clobbered
- // before it is reloaded in the epilogue. The Visited set contains all
- // blocks outside of the region delimited by prologue/epilogue.
- if (I.isSpilledToReg()) {
- for (MachineBasicBlock &MBB : MF) {
- if (Visited.count(&MBB))
- continue;
- MCRegister DstReg = I.getDstReg();
- if (!MBB.isLiveIn(DstReg))
- MBB.addLiveIn(DstReg);
- }
+ for (MachineBasicBlock *MBB : Visited) {
+ MCPhysReg Reg = Info.getReg();
+ // Add the callee-saved register as live-in.
+ // It's killed at the spill.
+ if (!MRI.isReserved(Reg) && !MBB->isLiveIn(Reg))
+ MBB->addLiveIn(Reg);
+ }
+ // If callee-saved register is spilled to another register rather than
+ // spilling to stack, the destination register has to be marked as live for
+ // each MBB between the save and restore point so that it is not clobbered
+ // before it is reloaded in the restore point. The Visited set contains all
+ // blocks outside of the region delimited by save/restore.
+ if (Info.isSpilledToReg()) {
+ for (MachineBasicBlock &MBB : MF) {
+ if (Visited.count(&MBB))
+ continue;
+ MCPhysReg DstReg = Info.getDstReg();
+ if (!MBB.isLiveIn(DstReg))
+ MBB.addLiveIn(DstReg);
}
}
}
@@ -648,7 +662,7 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
/// Insert restore code for the callee-saved registers used in the function.
static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
- std::vector<CalleeSavedInfo> &CSI) {
+ std::vector<CalleeSavedInfo> CSI) {
MachineFunction &MF = *RestoreBlock.getParent();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
@@ -665,6 +679,64 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
}
}
+static void fillCSInfoPerBB(MachineFrameInfo &MFI,
+ DenseMap<MCRegister, CalleeSavedInfo *> &RegToInfo,
+ MBBVector &PrologEpilogBlocks, bool isSave) {
+ // Global CalleeSavedInfo list aggregating CSIVs for all points
+ std::vector<CalleeSavedInfo> GCSIV;
+ const SaveRestorePoints &SRPoints =
+ isSave ? MFI.getSavePoints() : MFI.getRestorePoints();
+ SaveRestorePoints Inner;
+ for (auto [BB, Regs] : SRPoints) {
+ // CalleeSavedInfo list for each point
+ std::vector<CalleeSavedInfo> CSIV;
+ for (auto &Reg : Regs) {
+ auto It = RegToInfo.find(Reg.getReg());
+ if (It == RegToInfo.end())
+ continue;
+ CSIV.push_back(*RegToInfo.at(Reg.getReg()));
+ GCSIV.push_back(*RegToInfo.at(Reg.getReg()));
+ }
+ // We need to sort CSIV, because Aarch64 expect CSI list to come sorted by
+ // frame index
+ sort(CSIV, [](const CalleeSavedInfo &Lhs, const CalleeSavedInfo &Rhs) {
+ return Lhs.getFrameIdx() < Rhs.getFrameIdx();
+ });
+ Inner.try_emplace(BB, std::move(CSIV));
+ }
+
+ // If in any case not all CSRs listed in MFI.getCalleeSavedInfo are in the
+ // list of spilled/restored registers (for example AArch64 backend add VG
+ // registers in the list of CalleeSavedRegs during spill slot assignment), we
+ // should add them to this list and spill/restore them in Prolog/Epilog.
+ if (GCSIV.size() < RegToInfo.size()) {
+ for (auto &RTI : RegToInfo) {
+ if (count_if(GCSIV, [&RTI](const CalleeSavedInfo &CSI) {
+ return CSI.getReg() == RTI.first;
+ }))
+ continue;
+ for (MachineBasicBlock *BB : PrologEpilogBlocks) {
+ if (auto Entry = Inner.find(BB); Entry != Inner.end()) {
+ auto &CSI = Entry->second;
+ CSI.push_back(*RTI.second);
+ sort(CSI, [](const CalleeSavedInfo &Lhs, const CalleeSavedInfo &Rhs) {
+ return Lhs.getFrameIdx() < Rhs.getFrameIdx();
+ });
+ continue;
+ }
+ // CalleeSavedInfo list for each point
+ Inner.try_emplace(BB,
+ std::initializer_list<CalleeSavedInfo>{*RTI.second});
+ }
+ }
+ }
+
+ if (isSave)
+ MFI.setSavePoints(Inner);
+ else
+ MFI.setRestorePoints(Inner);
+}
+
void PEIImpl::spillCalleeSavedRegs(MachineFunction &MF) {
// We can't list this requirement in getRequiredProperties because some
// targets (WebAssembly) use virtual registers past this point, and the pass
@@ -691,18 +763,18 @@ void PEIImpl::spillCalleeSavedRegs(MachineFunction &MF) {
MFI.setCalleeSavedInfoValid(true);
std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ DenseMap<MCRegister, CalleeSavedInfo *> RegToInfo;
+ for (auto &CS : CSI)
+ RegToInfo.insert({CS.getReg(), &CS});
- // Fill SavePoints and RestorePoints with CalleeSavedRegisters
- if (!MFI.getSavePoints().empty()) {
- SaveRestorePoints SaveRestorePts;
- for (const auto &SavePoint : MFI.getSavePoints())
- SaveRestorePts.insert({SavePoint.first, CSI});
- MFI.setSavePoints(std::move(SaveRestorePts));
-
- SaveRestorePts.clear();
- for (const auto &RestorePoint : MFI.getRestorePoints())
- SaveRestorePts.insert({RestorePoint.first, CSI});
- MFI.setRestorePoints(std::move(SaveRestorePts));
+ if (MFI.getSavePoints().size() > 1) {
+ fillCSInfoPerBB(MFI, RegToInfo, PrologBlocks, /*isSave=*/true);
+ fillCSInfoPerBB(MFI, RegToInfo, EpilogBlocks, /*isSave=*/false);
+ } else {
+ SaveRestorePoints SavePts;
+ for (MachineBasicBlock *PrologBlock : PrologBlocks)
+ SavePts.insert({PrologBlock, MFI.getCalleeSavedInfo()});
+ MFI.setSavePoints(std::move(SavePts));
}
if (!CSI.empty()) {
@@ -710,13 +782,38 @@ void PEIImpl::spillCalleeSavedRegs(MachineFunction &MF) {
NumLeafFuncWithSpills++;
for (MachineBasicBlock *SaveBlock : SaveBlocks)
- insertCSRSaves(*SaveBlock, CSI);
+ insertCSRSaves(*SaveBlock, MFI.getSavePoints().empty()
+ ? CSI
+ : MFI.getSaveCSInfo(SaveBlock));
+
+ MachineBasicBlock *Save = nullptr;
+ MachineBasicBlock *Restore = nullptr;
+ for (auto &CS : CSI) {
+ if (!MFI.getSavePoints().empty()) {
+ if (auto BB = MFI.findSpilledIn(CS))
+ Save = BB;
+
+ if (auto BB = MFI.findRestoredIn(CS))
+ Restore = BB;
+ }
+ // Update the live-in information of all the blocks up to the save
+ // point.
+ updateLiveness(MF, Save, Restore, CS);
+ }
- // Update the live-in information of all the blocks up to the save point.
- updateLiveness(MF);
+ if (MFI.getRestorePoints().size() <= 1) {
+ SaveRestorePoints RestorePts;
+ for (MachineBasicBlock *EpilogBlock : EpilogBlocks)
+ RestorePts.insert({EpilogBlock, MFI.getCalleeSavedInfo()});
+ MFI.setRestorePoints(std::move(RestorePts));
+ }
- for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
- insertCSRRestores(*RestoreBlock, CSI);
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks) {
+ insertCSRRestores(*RestoreBlock,
+ MFI.getRestorePoints().empty()
+ ? CSI
+ : MFI.getRestoreCSInfo(RestoreBlock));
+ }
}
}
}
@@ -1189,26 +1286,26 @@ void PEIImpl::insertPrologEpilogCode(MachineFunction &MF) {
const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
// Add prologue to the function...
- for (MachineBasicBlock *SaveBlock : SaveBlocks)
- TFI.emitPrologue(MF, *SaveBlock);
+ for (MachineBasicBlock *PrologBlock : PrologBlocks)
+ TFI.emitPrologue(MF, *PrologBlock);
// Add epilogue to restore the callee-save registers in each exiting block.
- for (MachineBasicBlock *RestoreBlock : RestoreBlocks)
- TFI.emitEpilogue(MF, *RestoreBlock);
+ for (MachineBasicBlock *EpilogBlock : EpilogBlocks)
+ TFI.emitEpilogue(MF, *EpilogBlock);
// Zero call used registers before restoring callee-saved registers.
insertZeroCallUsedRegs(MF);
- for (MachineBasicBlock *SaveBlock : SaveBlocks)
- TFI.inlineStackProbe(MF, *SaveBlock);
+ for (MachineBasicBlock *PrologBlock : PrologBlocks)
+ TFI.inlineStackProbe(MF, *PrologBlock);
// Emit additional code that is required to support segmented stacks, if
// we've been asked for it. This, when linked with a runtime with support
// for segmented stacks (libgcc is one), will result in allocating stack
// space in small chunks instead of one large contiguous block.
if (MF.shouldSplitStack()) {
- for (MachineBasicBlock *SaveBlock : SaveBlocks)
- TFI.adjustForSegmentedStacks(MF, *SaveBlock);
+ for (MachineBasicBlock *PrologBlock : PrologBlocks)
+ TFI.adjustForSegmentedStacks(MF, *PrologBlock);
}
// Emit additional code that is required to explicitly handle the stack in
@@ -1217,8 +1314,8 @@ void PEIImpl::insertPrologEpilogCode(MachineFunction &MF) {
// different conditional check and another BIF for allocating more stack
// space.
if (MF.getFunction().getCallingConv() == CallingConv::HiPE)
- for (MachineBasicBlock *SaveBlock : SaveBlocks)
- TFI.adjustForHiPEPrologue(MF, *SaveBlock);
+ for (MachineBasicBlock *PrologBlock : PrologBlocks)
+ TFI.adjustForHiPEPrologue(MF, *PrologBlock);
}
/// insertZeroCallUsedRegs - Zero out call used registers.
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index 83581052560cb..5ea4ab17a3124 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -957,6 +957,7 @@ bool ShrinkWrapImpl::run(MachineFunction &MF) {
bool HasCandidate = performShrinkWrapping(RPOT, RS.get());
StackAddressUsedBlockInfo.clear();
Changed = postShrinkWrapping(HasCandidate, MF, RS.get());
+
if (!HasCandidate && !Changed)
return false;
if (!ArePointsInteresting())
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 75e7cf347e461..76a121aa4aa96 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -547,9 +547,8 @@ uint64_t RISCVFrameLowering::getStackSizeWithRVVPadding(
return alignTo(MFI.getStackSize() + RVFI->getRVVPadding(), getStackAlign());
}
-static SmallVector<CalleeSavedInfo, 8>
-getUnmanagedCSI(const MachineFunction &MF,
- const std::vector<CalleeSavedInfo> &CSI) {
+SmallVector<CalleeSavedInfo, 8> RISCVFrameLowering::getUnmanagedCSI(
+ const MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
SmallVector<CalleeSavedInfo, 8> NonLibcallCSI;
@@ -948,15 +947,22 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// Determine the correct frame layout
determineFrameLayout(MF);
- const auto &CSI = MFI.getCalleeSavedInfo();
+ const auto &CSI = MFI.getSaveCSInfo(&MBB);
// Skip to before the spills of scalar callee-saved registers
// FIXME: assumes exactly one instruction is used to restore each
// callee-saved register.
- MBBI = std::prev(MBBI, getRVVCalleeSavedInfo(MF, CSI).size() +
- getUnmanagedCSI(MF, CSI).size());
CFIInstBuilder CFIBuilder(MBB, MBBI, MachineInstr::FrameSetup);
bool NeedsDwarfCFI = needsDwarfCFI(MF);
+ // For scalar register spills we skip 2 instrs at once, because right after
+ // spills there are cfi instructions. At the moment of prolog emission they
+ // are already inserted for scalar instructions, but not for vector
+ // instructions.
+ int ScalarDistance = getUnmanagedCSI(MF, CSI).size();
+ if (NeedsDwarfCFI)
+ ScalarDistance *= 2;
+ int VectorDistance = getRVVCalleeSavedInfo(MF, CSI).size();
+ MBBI = std::prev(MBBI, VectorDistance + ScalarDistance);
// If libcalls are used to spill and restore callee-saved registers, the frame
// has two sections; the opaque section managed by the libcalls, and the
@@ -1045,6 +1051,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
StackSize -= StackAdj;
if (NeedsDwarfCFI) {
+ CFIBuilder.setInsertPoint(MBBI);
CFIBuilder.buildDefCFAOffset(RealStackSize - StackSize);
for (const CalleeSavedInfo &CS : getPushOrLibCallsSavedInfo(MF, CSI))
CFIBuilder.buildOffset(CS.getReg(),
@@ -1073,15 +1080,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
// to the stack, not before.
// FIXME: assumes exactly one instruction is used to save each callee-saved
// register.
- std::advance(MBBI, getUnmanagedCSI(MF, CSI).size());
- CFIBuilder.setInsertPoint(MBBI);
-
- // Iterate over list of callee-saved registers and emit .cfi_offset
- // directives.
- if (NeedsDwarfCFI)
- for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
- CFIBuilder.buildOffset(CS.getReg(),
- MFI.getObjectOffset(CS.getFrameIdx()));
+ std::advance(MBBI, ScalarDistance);
// Generate new FP.
if (hasFP(MF)) {
@@ -1100,8 +1099,10 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr::FrameSetup, getStackAlign());
}
- if (NeedsDwarfCFI)
+ if (NeedsDwarfCFI) {
+ CFIBuilder.setInsertPoint(MBBI);
CFIBuilder.buildDefCFA(FPReg, RVFI->getVarArgsSaveSize());
+ }
}
uint64_t SecondSPAdjustAmount = 0;
@@ -1131,6 +1132,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
}
if (NeedsDwarfCFI && !hasFP(MF)) {
+ CFIBuilder.setInsertPoint(MBBI);
// Emit .cfi_def_cfa_expression "sp + StackSize + RVVStackSize * vlenb".
CFIBuilder.insertCFIInst(createDefCFAExpression(
*RI, SPReg, getStackSizeWithRVVPadding(MF), RVVStackSize / 8));
@@ -1238,7 +1240,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
--MBBI;
}
- const auto &CSI = MFI.getCalleeSavedInfo();
+ const auto &CSI = MFI.getRestoreCSInfo(&MBB);
// Skip to before the restores of scalar callee-saved registers
// FIXME: assumes exactly one instruction is used to restore each
@@ -1315,8 +1317,11 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
// Skip to after the restores of scalar callee-saved registers
// FIXME: assumes exactly one instruction is used to restore each
// callee-saved register.
- MBBI = std::next(FirstScalarCSRRestoreInsn, getUnmanagedCSI(MF, CSI).size());
- CFIBuilder.setInsertPoint(MBBI);
+ // Skip CSR restore instructions + corresponding cfi restore instructions
+ int ScalarDistance = getUnmanagedCSI(MF, CSI).size();
+ if (NeedsDwarfCFI)
+ ScalarDistance *= 2;
+ MBBI = std::next(FirstScalarCSRRestoreInsn, ScalarDistance);
if (getLibCallID(MF, CSI) != -1) {
// tail __riscv_restore_[0-12] instruction is considered as a terminator,
@@ -1331,11 +1336,6 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
return;
}
- // Recover callee-saved registers.
- if (NeedsDwarfCFI)
- for (const CalleeSavedInfo &CS : getUnmanagedCSI(MF, CSI))
- CFIBuilder.buildRestore(CS.getReg());
-
if (RVFI->isPushable(MF) && MBBI != MBB.end() && isPop(MBBI->getOpcode())) {
// Use available stack adjustment in pop instruction to deallocate stack
// space. Align the stack size down to a multiple of 16. This is needed for
@@ -2110,6 +2110,34 @@ bool RISCVFrameLowering::assignCalleeSavedSpillSlots(
return true;
}
+static int64_t calculateCSRSpillOffsets(MachineFrameInfo &MFI,
+ const TargetFrameLowering *TFI,
+ int MinCSFI, int FrameIdx) {
+ int LocalAreaOffset = -TFI->getOffsetOfLocalArea();
+ Align MaxAlign = MFI.getMaxAlign();
+ Align Alignment = MFI.getObjectAlign(FrameIdx);
+ MaxAlign = std::max(MaxAlign, Alignment);
+ int64_t Offset = LocalAreaOffset;
+
+ for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) {
+ // Only allocate objects on the default stack.
+ if (MFI.getStackID(i) != TargetStackID::Default)
+ continue;
+
+ int64_t FixedOff;
+ FixedOff = -MFI.getObjectOffset(i);
+ if (FixedOff > Offset)
+ Offset = FixedOff;
+ }
+
+ for (int i = MinCSFI; i <= FrameIdx; ++i) {
+ Offset += MFI.getObjectSize(i);
+ }
+
+ Offset = alignTo(Offset, Alignment);
+ return -Offset;
+}
+
bool RISCVFrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
@@ -2137,6 +2165,7 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
MBB.addLiveIn(Reg);
}
+ // Emit CM.PUSH with base SPimm & evaluate Push stack
if (RVFI->isPushable(*MF)) {
// Emit CM.PUSH with base StackAdj & evaluate Push stack
unsigned PushedRegNum = RVFI->getRVPushRegs();
@@ -2179,7 +2208,52 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
MachineInstr::FrameSetup);
}
};
+
storeRegsToStackSlots(UnmanagedCSI);
+
+ bool NeedsDwarfCFI = needsDwarfCFI(*MF);
+ // Iterate over list of callee-saved registers and emit .cfi_offset
+ // directives.
+ if (NeedsDwarfCFI) {
+ CFIInstBuilder CFIBuilder(MBB, MI, MachineInstr::FrameSetup);
+ MachineFrameInfo &MFI = MF->getFrameInfo();
+
+ for (const CalleeSavedInfo &CS : UnmanagedCSI) {
+ int FrameIdx = CS.getFrameIdx();
+ if (FrameIdx < 0 ||
+ MFI.getStackID(FrameIdx) != TargetStackID::ScalableVector) {
+ int64_t Offset = 0;
+
+ auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
+ std::vector<CalleeSavedInfo> GCSI = MFI.getCalleeSavedInfo();
+ unsigned MinCSFI = std::numeric_limits<unsigned>::max();
+ for (auto CS : GCSI) {
+ if (CS.getFrameIdx() >= 0 && CS.getFrameIdx() < MinCSFI)
+ MinCSFI = CS.getFrameIdx();
+ }
+ if (MinCSFI == std::numeric_limits<unsigned>::max())
+ MinCSFI = 0;
+
+ if (RVFI->isSiFivePreemptibleInterrupt(*MF)) {
+ for (int I = 0; I < 2; ++I) {
+ int FI = RVFI->getInterruptCSRFrameIndex(I);
+ MinCSFI = std::min<unsigned>(MinCSFI, FI);
+ }
+ }
+
+ if (FrameIdx < 0 &&
+ (RVFI->isPushable(*MF) || RVFI->useSaveRestoreLibCalls(*MF))) {
+ Offset = MFI.getObjectOffset(FrameIdx);
+ } else {
+ const TargetFrameLowering *TFI =
+ MF->getSubtarget().getFrameLowering();
+ Offset = calculateCSRSpillOffsets(MFI, TFI, MinCSFI, FrameIdx);
+ }
+ CFIBuilder.buildOffset(CS.getReg(), Offset);
+ }
+ }
+ }
+
storeRegsToStackSlots(RVVCSI);
return true;
@@ -2272,8 +2346,17 @@ bool RISCVFrameLowering::restoreCalleeSavedRegisters(
}
};
loadRegFromStackSlot(RVVCSI);
+
loadRegFromStackSlot(UnmanagedCSI);
+ bool NeedsDwarfCFI = needsDwarfCFI(*MF);
+ // Recover callee-saved registers.
+ if (NeedsDwarfCFI) {
+ CFIInstBuilder CFIBuilder(MBB, MI, MachineInstr::FrameDestroy);
+ for (const CalleeSavedInfo &CS : UnmanagedCSI)
+ CFIBuilder.buildRestore(CS.getReg());
+ }
+
RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
if (RVFI->useQCIInterrupt(*MF)) {
// Don't emit anything here because restoration is handled by
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 87980dfb09f96..9c88c49fa45de 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -31,6 +31,10 @@ class RISCVFrameLowering : public TargetFrameLowering {
uint64_t getStackSizeWithRVVPadding(const MachineFunction &MF) const;
+ SmallVector<CalleeSavedInfo, 8>
+ getUnmanagedCSI(const MachineFunction &MF,
+ const std::vector<CalleeSavedInfo> &CSI) const;
+
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 263d6a1fc2220..94980947274ca 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -511,12 +511,66 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
DebugLoc DL = MI.getDebugLoc();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
Register FrameReg;
StackOffset Offset =
getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg);
+
+ const auto &CSI =
+ getFrameLowering(MF)->getUnmanagedCSI(MF, MFI.getCalleeSavedInfo());
+
+ if (!CSI.empty()) {
+ int MinCSFI = CSI.front().getFrameIdx();
+ int MaxCSFI = CSI.back().getFrameIdx();
+
+ // If our FrameIndex is CSI FrameIndex we in some cases need additional
+ // adjustment
+ if (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI) {
+ MachineBasicBlock *SpilledIn = nullptr;
+ MachineBasicBlock *RestoredIn = nullptr;
+ auto It = std::find_if(CSI.begin(), CSI.end(), [FrameIndex](auto &CS) {
+ return CS.getFrameIdx() == FrameIndex;
+ });
+
+ assert(It != CSI.end() &&
+ "Did't find CalleeSavedInfo for CalleeSaved FrameIndex");
+
+ assert(!(MI.mayLoad() && MI.mayStore()) &&
+ "Instruction with frame index operand may load and store "
+ "simultaneously!");
+
+ if (MI.mayStore())
+ SpilledIn = MFI.findSpilledIn(*It);
+ else if (MI.mayLoad())
+ RestoredIn = MFI.findRestoredIn(*It);
+ else
+ llvm_unreachable(
+ "Instruction with frame index operand neither loads nor stores!");
+
+ bool SpilledRestoredInPrologEpilog = true;
+ // If we didn't managed to find NCD (NCPD) for the list of Save (Restore)
+ // blocks, spill (restore) will be unconditionally in Prolog (Epilog)
+ if (MI.mayStore() && MFI.getProlog())
+ SpilledRestoredInPrologEpilog = SpilledIn == MFI.getProlog();
+ else if (MI.mayLoad() && MFI.getEpilog())
+ SpilledRestoredInPrologEpilog = RestoredIn == MFI.getEpilog();
+
+ // For spills/restores performed not in Prolog/Epilog we need to add full
+ // SP offset, despite SPAdjusment optimization, because at the end of
+ // Prolog or at the start of Epilog SP has maximum offset
+ uint64_t FirstSPAdjustAmount =
+ getFrameLowering(MF)->getFirstSPAdjustAmount(MF);
+ if (FirstSPAdjustAmount && !SpilledRestoredInPrologEpilog) {
+ Offset += StackOffset::getFixed(
+ getFrameLowering(MF)->getStackSizeWithRVVPadding(MF) -
+ FirstSPAdjustAmount);
+ }
+ }
+ }
+
bool IsRVVSpill = RISCV::isRVVSpill(MI);
if (!IsRVVSpill)
Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
diff --git a/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir b/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir
index 1e14e7149817a..89f4e8eb161ce 100644
--- a/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir
+++ b/llvm/test/CodeGen/AArch64/live-debugvalues-sve.mir
@@ -121,9 +121,17 @@ frameInfo:
hasCalls: true
maxCallFrameSize: 0
savePoint:
- - point: '%bb.1'
+ - point: '%bb.1'
+ registers:
+ - '$fp'
+ - '$lr'
+ - '$x28'
restorePoint:
- - point: '%bb.1'
+ - point: '%bb.1'
+ registers:
+ - '$fp'
+ - '$lr'
+ - '$x28'
stack:
- { id: 0, size: 16, alignment: 16, stack-id: scalable-vector }
machineFunctionInfo: {}
diff --git a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
index 35ddcfd9ba6d6..a61d669b014b5 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll
@@ -49,9 +49,9 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64
; CHECK-NEXT: slwi r8, r4, 1
; CHECK-NEXT: li r10, 0
; CHECK-NEXT: li r11, 0
-; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill
-; CHECK-NEXT: add r8, r4, r8
; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill
+; CHECK-NEXT: add r8, r4, r8
+; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill
; CHECK-NEXT: add r9, r5, r8
; CHECK-NEXT: add r5, r5, r4
; CHECK-NEXT: add r8, r3, r5
@@ -84,8 +84,8 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64
; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt
; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3
; CHECK-NEXT: # %bb.5:
-; CHECK-NEXT: lwz r31, -4(r1) # 4-byte Folded Reload
; CHECK-NEXT: lwz r30, -8(r1) # 4-byte Folded Reload
+; CHECK-NEXT: lwz r31, -4(r1) # 4-byte Folded Reload
; CHECK-NEXT: mr r4, r5
; CHECK-NEXT: blr
; CHECK-NEXT: L..BB0_6:
diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll
index 8283e7bac3457..85f27f7923fea 100644
--- a/llvm/test/CodeGen/PowerPC/common-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/common-chain.ll
@@ -426,11 +426,11 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) {
; CHECK-NEXT: cmpdi r6, 0
; CHECK-NEXT: ble cr0, .LBB4_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader
-; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-NEXT: add r5, r3, r5
; CHECK-NEXT: li r3, 0
-; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-NEXT: mtctr r6
; CHECK-NEXT: mulli r11, r4, 10
; CHECK-NEXT: sldi r8, r4, 2
@@ -455,9 +455,9 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) {
; CHECK-NEXT: maddld r3, r6, r28, r3
; CHECK-NEXT: bdnz .LBB4_2
; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB4_4:
; CHECK-NEXT: li r3, 0
diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
index cc38e250f183f..00627b8434beb 100644
--- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
@@ -189,8 +189,8 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
; CHECK-NEXT: cmplwi r4, 0
; CHECK-NEXT: beq cr0, .LBB2_4
; CHECK-NEXT: # %bb.1: # %bb3.preheader
-; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-NEXT: addi r10, r3, 4002
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: li r5, -1
@@ -198,10 +198,10 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
; CHECK-NEXT: li r7, 3
; CHECK-NEXT: li r8, 5
; CHECK-NEXT: li r9, 9
-; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
-; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill
; CHECK-NEXT: mtctr r4
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB2_2: # %bb3
@@ -226,13 +226,13 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) {
; CHECK-NEXT: maddld r3, r11, r25, r3
; CHECK-NEXT: bdnz .LBB2_2
; CHECK-NEXT: # %bb.3:
-; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-NEXT: add r3, r3, r4
-; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB2_4:
; CHECK-NEXT: addi r3, r4, 0
@@ -583,10 +583,10 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
; CHECK-NEXT: beq cr0, .LBB6_9
; CHECK-NEXT: # %bb.1: # %bb3
; CHECK-NEXT: addis r5, r2, .LC0 at toc@ha
-; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-NEXT: ld r5, .LC0 at toc@l(r5)
-; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-NEXT: addi r6, r3, 4009
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: li r7, -7
@@ -649,9 +649,9 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) {
; CHECK-NEXT: add r4, r30, r4
; CHECK-NEXT: b .LBB6_3
; CHECK-NEXT: .LBB6_8:
-; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
-; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
+; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: blr
; CHECK-NEXT: .LBB6_9:
; CHECK-NEXT: li r3, 0
diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
index 79f2ef3e3746a..3de0fe239021c 100644
--- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
@@ -8,22 +8,22 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6
; CHECK-NEXT: cmpd 5, 7
; CHECK-NEXT: bgelr 0
; CHECK-NEXT: # %bb.1: # %.preheader
-; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill
-; CHECK-NEXT: addi 27, 5, 2
-; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill
-; CHECK-NEXT: addi 28, 5, 3
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-NEXT: addi 30, 5, 1
+; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; CHECK-NEXT: addi 28, 5, 3
+; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; CHECK-NEXT: addi 27, 5, 2
; CHECK-NEXT: mulld 12, 8, 5
; CHECK-NEXT: mulld 0, 9, 8
; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
; CHECK-NEXT: addi 29, 3, 16
; CHECK-NEXT: sldi 11, 10, 3
-; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill
-; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill
; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill
; CHECK-NEXT: mulld 30, 8, 30
; CHECK-NEXT: mulld 28, 8, 28
; CHECK-NEXT: mulld 8, 8, 27
@@ -104,15 +104,15 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6
; CHECK-NEXT: blt 0, .LBB0_5
; CHECK-NEXT: b .LBB0_2
; CHECK-NEXT: .LBB0_6:
-; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 26, -48(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 25, -56(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 24, -64(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 23, -72(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 22, -80(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 23, -72(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 24, -64(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 25, -56(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 26, -48(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 27, -40(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 28, -32(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; CHECK-NEXT: blr
%9 = icmp slt i64 %2, %4
br i1 %9, label %10, label %97
diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
index af0942e99182d..6a6c24fdfbeee 100644
--- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
@@ -93,7 +93,6 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
; CHECK-NEXT: std 17, 424(1) # 8-byte Folded Spill
; CHECK-NEXT: ld 23, 784(1)
; CHECK-NEXT: ld 20, 776(1)
-; CHECK-NEXT: std 24, 480(1) # 8-byte Folded Spill
; CHECK-NEXT: std 25, 488(1) # 8-byte Folded Spill
; CHECK-NEXT: ld 25, 800(1)
; CHECK-NEXT: ld 24, 792(1)
@@ -267,6 +266,7 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
; CHECK-NEXT: cmpld 28, 4
; CHECK-NEXT: ble 0, .LBB0_3
; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit
+<<<<<<< HEAD
; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload
; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload
; CHECK-NEXT: stxv 1, 0(3)
@@ -283,18 +283,28 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload
; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload
; CHECK-NEXT: stxv 0, 0(3)
+=======
+; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 39, 0(3)
+; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 16, 416(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 17, 424(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 18, 432(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 19, 440(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 20, 448(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 21, 456(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 22, 464(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 23, 472(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 24, 480(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 25, 488(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 38, 0(3)
+>>>>>>> a1ce745b0c3d ([llvm] support multiple save/restore points in prolog-epilog)
; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd 29, 568(1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd 28, 560(1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd 27, 552(1) # 8-byte Folded Reload
-; CHECK-NEXT: lfd 26, 544(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 31, 536(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 30, 528(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload
-; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 27, 504(1) # 8-byte Folded Reload
+<<<<<<< HEAD
; CHECK-NEXT: stxv 5, 0(3)
; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 26, 496(1) # 8-byte Folded Reload
@@ -312,6 +322,55 @@ define void @foo(ptr %.m, ptr %.n, ptr %.a, ptr %.x, ptr %.l, ptr %.vy01, ptr %.
; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 15, 408(1) # 8-byte Folded Reload
; CHECK-NEXT: ld 14, 400(1) # 8-byte Folded Reload
+=======
+; CHECK-NEXT: ld 28, 512(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 29, 520(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 30, 528(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 31, 536(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 26, 544(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 27, 552(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 28, 560(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 29, 568(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 30, 576(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 33, 0(3)
+; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload
+; CHECK-NEXT: lfd 31, 584(1) # 8-byte Folded Reload
+; CHECK-NEXT: lxv 52, 208(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 53, 224(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 54, 240(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 55, 256(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 56, 272(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 57, 288(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 58, 304(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 59, 320(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 60, 336(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 61, 352(1) # 16-byte Folded Reload
+; CHECK-NEXT: stxv 32, 0(3)
+; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 37, 0(10)
+; CHECK-NEXT: stxv 36, 0(9)
+; CHECK-NEXT: stxv 13, 0(8)
+; CHECK-NEXT: lxv 62, 368(1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv 63, 384(1) # 16-byte Folded Reload
+; CHECK-NEXT: stxv 12, 0(3)
+; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 11, 0(3)
+; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 10, 0(3)
+; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 9, 0(3)
+; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 8, 0(3)
+; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 7, 0(3)
+; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 6, 0(3)
+; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 5, 0(3)
+; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload
+; CHECK-NEXT: stxv 4, 0(3)
+; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload
+>>>>>>> a1ce745b0c3d ([llvm] support multiple save/restore points in prolog-epilog)
; CHECK-NEXT: stxv 3, 0(3)
; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload
; CHECK-NEXT: stxv 2, 0(8)
diff --git a/llvm/test/CodeGen/PowerPC/pr43527.ll b/llvm/test/CodeGen/PowerPC/pr43527.ll
index adfea51077a0b..e175ea63d059c 100644
--- a/llvm/test/CodeGen/PowerPC/pr43527.ll
+++ b/llvm/test/CodeGen/PowerPC/pr43527.ll
@@ -18,6 +18,7 @@ define dso_local void @test(i64 %arg, i64 %arg1, ptr %arg2) {
; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -64(r1)
; CHECK-NEXT: mr r30, r5
; CHECK-NEXT: sub r29, r4, r3
@@ -36,7 +37,6 @@ define dso_local void @test(i64 %arg, i64 %arg1, ptr %arg2) {
; CHECK-NEXT: # %bb.4: # %bb15
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: ld r0, 16(r1)
-; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload
; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-NEXT: mtlr r0
diff --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll
index 12d0b056ca886..44215ce8c4dff 100644
--- a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll
+++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll
@@ -10,26 +10,26 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC64-NEXT: ble 0, .LBB0_4
; POWERPC64-NEXT: # %bb.1: # %for.body.preheader
; POWERPC64-NEXT: addi 4, 4, -1
-; POWERPC64-NEXT: std 14, -144(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 15, -136(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 16, -128(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 17, -120(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 18, -112(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 19, -104(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 20, -96(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 21, -88(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 22, -80(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 23, -72(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 24, -64(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 25, -56(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 26, -48(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 31, -8(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 29, -24(1) # 8-byte Folded Spill
; POWERPC64-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 26, -48(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 25, -56(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 24, -64(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 23, -72(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 22, -80(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 21, -88(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 20, -96(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 19, -104(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 18, -112(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 17, -120(1) # 8-byte Folded Spill
; POWERPC64-NEXT: clrldi 4, 4, 32
; POWERPC64-NEXT: addi 4, 4, 1
-; POWERPC64-NEXT: std 29, -24(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 30, -16(1) # 8-byte Folded Spill
-; POWERPC64-NEXT: std 31, -8(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 16, -128(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 15, -136(1) # 8-byte Folded Spill
+; POWERPC64-NEXT: std 14, -144(1) # 8-byte Folded Spill
; POWERPC64-NEXT: mtctr 4
; POWERPC64-NEXT: li 4, 0
; POWERPC64-NEXT: .p2align 4
@@ -40,25 +40,25 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC64-NEXT: #NO_APP
; POWERPC64-NEXT: bdnz .LBB0_2
; POWERPC64-NEXT: # %bb.3:
-; POWERPC64-NEXT: ld 31, -8(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 30, -16(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 29, -24(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 28, -32(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 14, -144(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 15, -136(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 16, -128(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 17, -120(1) # 8-byte Folded Reload
; POWERPC64-NEXT: extsw 3, 4
-; POWERPC64-NEXT: ld 27, -40(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 26, -48(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 25, -56(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 24, -64(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 23, -72(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 22, -80(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 21, -88(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 20, -96(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 19, -104(1) # 8-byte Folded Reload
; POWERPC64-NEXT: ld 18, -112(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 17, -120(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 16, -128(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 15, -136(1) # 8-byte Folded Reload
-; POWERPC64-NEXT: ld 14, -144(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 19, -104(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 20, -96(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 21, -88(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 22, -80(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 23, -72(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 24, -64(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 25, -56(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 26, -48(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 27, -40(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 28, -32(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; POWERPC64-NEXT: ld 31, -8(1) # 8-byte Folded Reload
; POWERPC64-NEXT: blr
; POWERPC64-NEXT: .LBB0_4:
; POWERPC64-NEXT: li 4, 0
@@ -70,24 +70,24 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC32-AIX-NEXT: cmpwi 4, 0
; POWERPC32-AIX-NEXT: ble 0, L..BB0_4
; POWERPC32-AIX-NEXT: # %bb.1: # %for.body.preheader
-; POWERPC32-AIX-NEXT: stw 14, -72(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 15, -68(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 16, -64(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 17, -60(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 18, -56(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 19, -52(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 20, -48(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 21, -44(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 22, -40(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 23, -36(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 24, -32(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 25, -28(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 26, -24(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 27, -20(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 28, -16(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 29, -12(1) # 4-byte Folded Spill
-; POWERPC32-AIX-NEXT: stw 30, -8(1) # 4-byte Folded Spill
; POWERPC32-AIX-NEXT: stw 31, -4(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 30, -8(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 29, -12(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 28, -16(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 27, -20(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 26, -24(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 25, -28(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 24, -32(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 23, -36(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 22, -40(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 21, -44(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 20, -48(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 19, -52(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 18, -56(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 17, -60(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 16, -64(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 15, -68(1) # 4-byte Folded Spill
+; POWERPC32-AIX-NEXT: stw 14, -72(1) # 4-byte Folded Spill
; POWERPC32-AIX-NEXT: mtctr 4
; POWERPC32-AIX-NEXT: li 4, 0
; POWERPC32-AIX-NEXT: .align 4
@@ -98,25 +98,25 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC32-AIX-NEXT: #NO_APP
; POWERPC32-AIX-NEXT: bdnz L..BB0_2
; POWERPC32-AIX-NEXT: # %bb.3:
-; POWERPC32-AIX-NEXT: lwz 31, -4(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 30, -8(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 29, -12(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 28, -16(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 14, -72(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 15, -68(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 16, -64(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 17, -60(1) # 4-byte Folded Reload
; POWERPC32-AIX-NEXT: mr 3, 4
-; POWERPC32-AIX-NEXT: lwz 27, -20(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 26, -24(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 25, -28(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 24, -32(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 23, -36(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 22, -40(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 21, -44(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 20, -48(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 19, -52(1) # 4-byte Folded Reload
; POWERPC32-AIX-NEXT: lwz 18, -56(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 17, -60(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 16, -64(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 15, -68(1) # 4-byte Folded Reload
-; POWERPC32-AIX-NEXT: lwz 14, -72(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 19, -52(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 20, -48(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 21, -44(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 22, -40(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 23, -36(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 24, -32(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 25, -28(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 26, -24(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 27, -20(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 28, -16(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 29, -12(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 30, -8(1) # 4-byte Folded Reload
+; POWERPC32-AIX-NEXT: lwz 31, -4(1) # 4-byte Folded Reload
; POWERPC32-AIX-NEXT: blr
; POWERPC32-AIX-NEXT: L..BB0_4:
; POWERPC32-AIX-NEXT: li 3, 0
@@ -128,26 +128,26 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC64-AIX-NEXT: blt 0, L..BB0_4
; POWERPC64-AIX-NEXT: # %bb.1: # %for.body.preheader
; POWERPC64-AIX-NEXT: addi 4, 4, -1
-; POWERPC64-AIX-NEXT: std 14, -144(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 15, -136(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 16, -128(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 17, -120(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 18, -112(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 19, -104(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 20, -96(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 21, -88(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 22, -80(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 23, -72(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 24, -64(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 25, -56(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 26, -48(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 31, -8(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 30, -16(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 29, -24(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: std 28, -32(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 27, -40(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 26, -48(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 25, -56(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 24, -64(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 23, -72(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 22, -80(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 21, -88(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 20, -96(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 19, -104(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 18, -112(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 17, -120(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: clrldi 4, 4, 32
; POWERPC64-AIX-NEXT: addi 4, 4, 1
-; POWERPC64-AIX-NEXT: std 29, -24(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 30, -16(1) # 8-byte Folded Spill
-; POWERPC64-AIX-NEXT: std 31, -8(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 16, -128(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 15, -136(1) # 8-byte Folded Spill
+; POWERPC64-AIX-NEXT: std 14, -144(1) # 8-byte Folded Spill
; POWERPC64-AIX-NEXT: mtctr 4
; POWERPC64-AIX-NEXT: li 4, 0
; POWERPC64-AIX-NEXT: .align 4
@@ -158,25 +158,25 @@ define signext i32 @shrinkwrapme(i32 signext %a, i32 signext %lim) {
; POWERPC64-AIX-NEXT: #NO_APP
; POWERPC64-AIX-NEXT: bdnz L..BB0_2
; POWERPC64-AIX-NEXT: # %bb.3:
-; POWERPC64-AIX-NEXT: ld 31, -8(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 30, -16(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 29, -24(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 28, -32(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 14, -144(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 15, -136(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 16, -128(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 17, -120(1) # 8-byte Folded Reload
; POWERPC64-AIX-NEXT: extsw 3, 4
-; POWERPC64-AIX-NEXT: ld 27, -40(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 26, -48(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 25, -56(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 24, -64(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 23, -72(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 22, -80(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 21, -88(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 20, -96(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 19, -104(1) # 8-byte Folded Reload
; POWERPC64-AIX-NEXT: ld 18, -112(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 17, -120(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 16, -128(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 15, -136(1) # 8-byte Folded Reload
-; POWERPC64-AIX-NEXT: ld 14, -144(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 19, -104(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 20, -96(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 21, -88(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 22, -80(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 23, -72(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 24, -64(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 25, -56(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 26, -48(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 27, -40(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 28, -32(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 29, -24(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 30, -16(1) # 8-byte Folded Reload
+; POWERPC64-AIX-NEXT: ld 31, -8(1) # 8-byte Folded Reload
; POWERPC64-AIX-NEXT: blr
; POWERPC64-AIX-NEXT: L..BB0_4:
; POWERPC64-AIX-NEXT: li 4, 0
>From a1e733ca8165e7932fc31f1ae8d3f69383fc1ba6 Mon Sep 17 00:00:00 2001
From: ens-sc <elizaveta.noskova at syntacore.com>
Date: Tue, 17 Sep 2024 16:46:57 +0300
Subject: [PATCH 2/2] [llvm] support save/restore point splitting in
shrink-wrap
This patch introduces "-enable-shrink-wrap-into-multiple-points"
option, which enables splitting Save and Restore points during ShrinkWrap pass, i.e.
insert registers saves and restores as close as possible to their usage.
Current algorithm disables Save / Restore point splitting for
functions with instructions with FrameIndex operands,
with EHPads and with any Stack accesses beacuse it is difficult to prove the safety of it.
This patch also add support for multiple Save / Restore points only for RISCV.
Now ShrinkWrap produces:
- list of SavePoint + Registers
- list of RestorePoint + Registers
- Prolog (NCD of Save points)
- Epilog (NCPD of Restore points)
---
.../llvm/CodeGen/TargetFrameLowering.h | 4 +
llvm/lib/CodeGen/PrologEpilogInserter.cpp | 30 +-
llvm/lib/CodeGen/ShrinkWrap.cpp | 513 ++++++++++++++----
llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp | 27 +-
llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 2 -
llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 18 +-
llvm/lib/Target/RISCV/RISCVFrameLowering.h | 2 +
llvm/test/CodeGen/RISCV/shrinkwrap-split.mir | 290 ++++++++++
llvm/tools/llvm-reduce/ReducerWorkItem.cpp | 6 -
9 files changed, 738 insertions(+), 154 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/shrinkwrap-split.mir
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 75696faf114cc..a0cd7df6a4bde 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -201,6 +201,10 @@ class LLVM_ABI TargetFrameLowering {
return false;
}
+ /// enableCSRSaveRestorePointsSplit - Returns true if the target support
+ /// multiple save/restore points in shrink wrapping.
+ virtual bool enableCSRSaveRestorePointsSplit() const { return false; }
+
/// Returns true if the stack slot holes in the fixed and callee-save stack
/// area should be used when allocating other stack locations to reduce stack
/// size.
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index ea9d830d37193..4427ec64e1e9f 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -458,19 +458,19 @@ void PEIImpl::calculateSaveRestoreBlocks(MachineFunction &MF) {
// Use the points found by shrink-wrapping, if any.
if (!MFI.getSavePoints().empty()) {
- assert(MFI.getSavePoints().size() == 1 &&
- "Multiple save points are not yet supported!");
- const auto &SavePoint = *MFI.getSavePoints().begin();
- SaveBlocks.push_back(SavePoint.first);
- assert(MFI.getRestorePoints().size() == 1 &&
- "Multiple restore points are not yet supported!");
- const auto &RestorePoint = *MFI.getRestorePoints().begin();
- MachineBasicBlock *RestoreBlock = RestorePoint.first;
- // If RestoreBlock does not have any successor and is not a return block
- // then the end point is unreachable and we do not need to insert any
- // epilogue.
- if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
- RestoreBlocks.push_back(RestoreBlock);
+ assert(!MFI.getRestorePoints().empty() &&
+ "Both restore and save must be set");
+ for (auto &item : MFI.getSavePoints())
+ SaveBlocks.push_back(item.first);
+
+ for (auto &item : MFI.getRestorePoints()) {
+ MachineBasicBlock *RestoreBlock = item.first;
+ // If RestoreBlock does not have any successor and is not a return block
+ // then the end point is unreachable and we do not need to insert any
+ // epilogue.
+ if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
+ RestoreBlocks.push_back(RestoreBlock);
+ }
return;
}
@@ -767,7 +767,7 @@ void PEIImpl::spillCalleeSavedRegs(MachineFunction &MF) {
for (auto &CS : CSI)
RegToInfo.insert({CS.getReg(), &CS});
- if (MFI.getSavePoints().size() > 1) {
+ if (!MFI.getSavePoints().empty()) {
fillCSInfoPerBB(MFI, RegToInfo, PrologBlocks, /*isSave=*/true);
fillCSInfoPerBB(MFI, RegToInfo, EpilogBlocks, /*isSave=*/false);
} else {
@@ -801,7 +801,7 @@ void PEIImpl::spillCalleeSavedRegs(MachineFunction &MF) {
updateLiveness(MF, Save, Restore, CS);
}
- if (MFI.getRestorePoints().size() <= 1) {
+ if (MFI.getRestorePoints().empty()) {
SaveRestorePoints RestorePts;
for (MachineBasicBlock *EpilogBlock : EpilogBlocks)
RestorePts.insert({EpilogBlock, MFI.getCalleeSavedInfo()});
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index 5ea4ab17a3124..a388d73690607 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -94,13 +94,20 @@ STATISTIC(NumFunc, "Number of functions");
STATISTIC(NumCandidates, "Number of shrink-wrapping candidates");
STATISTIC(NumCandidatesDropped,
"Number of shrink-wrapping candidates dropped because of frequency");
+STATISTIC(
+ NumFuncWithSplitting,
+ "Number of functions, for which we managed to split Save/Restore points");
static cl::opt<cl::boolOrDefault>
EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
cl::desc("enable the shrink-wrapping pass"));
static cl::opt<bool> EnablePostShrinkWrapOpt(
- "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden,
- cl::desc("enable splitting of the restore block if possible"));
+ "enable-post-shrink-wrap-restore-split", cl::init(true), cl::Hidden,
+ cl::desc(
+ "enable after-shrink-wrap splitting of the restore block if possible"));
+static cl::opt<bool> EnableShrinkWrapSplitOpt(
+ "enable-shrink-wrap-into-multiple-points", cl::init(false), cl::Hidden,
+ cl::desc("enable splitting of the save and restore blocks if possible"));
namespace {
@@ -117,15 +124,80 @@ class ShrinkWrapImpl {
MachineDominatorTree *MDT = nullptr;
MachinePostDominatorTree *MPDT = nullptr;
- /// Current safe point found for the prologue.
- /// The prologue will be inserted before the first instruction
- /// in this basic block.
- MachineBasicBlock *Save = nullptr;
+ /// Hash table, mapping register with its corresponding spill and restore
+ /// basic block.
+ DenseMap<Register, std::pair<MachineBasicBlock *, MachineBasicBlock *>>
+ SavedRegs;
- /// Current safe point found for the epilogue.
- /// The epilogue will be inserted before the first terminator instruction
- /// in this basic block.
- MachineBasicBlock *Restore = nullptr;
+ class SaveRestorePoints {
+ llvm::SaveRestorePoints SRPoints;
+
+ public:
+ llvm::SaveRestorePoints &get() { return SRPoints; }
+
+ void set(llvm::SaveRestorePoints &Rhs) { SRPoints = std::move(Rhs); }
+
+ void clear() { SRPoints.clear(); }
+
+ bool areMultiple() const { return SRPoints.size() > 1; }
+
+ MachineBasicBlock *getFirst() {
+ return SRPoints.empty() ? nullptr : SRPoints.begin()->first;
+ }
+
+ void insert(const std::pair<MachineBasicBlock *,
+ std::vector<CalleeSavedInfo>> &Point) {
+ SRPoints.insert(Point);
+ }
+
+ void insert(
+ std::pair<MachineBasicBlock *, std::vector<CalleeSavedInfo>> &&Point) {
+ SRPoints.insert(Point);
+ }
+
+ void insertReg(
+ Register Reg, MachineBasicBlock *MBB,
+ std::optional<std::vector<MachineBasicBlock *>> SaveRestoreBlockList) {
+ assert(MBB && "MBB is nullptr");
+ if (SRPoints.contains(MBB)) {
+ SRPoints[MBB].push_back(CalleeSavedInfo(Reg));
+ return;
+ }
+ std::vector CSInfos{CalleeSavedInfo(Reg)};
+ SRPoints.insert(std::make_pair(MBB, CSInfos));
+ if (SaveRestoreBlockList.has_value())
+ SaveRestoreBlockList->push_back(MBB);
+ }
+
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ for (auto [BB, CSIV] : SRPoints) {
+ OS << printMBBReference(*BB) << ": ";
+ for (auto &CSI : CSIV) {
+ OS << printReg(CSI.getReg(), TRI) << " ";
+ }
+ OS << "\n";
+ }
+ }
+
+ void dump(const TargetRegisterInfo *TRI) const { print(dbgs(), TRI); }
+ };
+
+ /// Class, wrapping hash table contained safe points, found for register spill
+ /// mapped to the list of corresponding registers. Register spill will be
+ /// inserted before the first instruction in this basic block.
+ SaveRestorePoints SavePoints;
+
+ /// Class, wrapping hash table contained safe points, found for register
+ /// restore mapped to the list of corresponding registers. Register restore
+ /// will be inserted before the first terminator instruction in this basic
+ /// block.
+ SaveRestorePoints RestorePoints;
+
+ std::vector<MachineBasicBlock *> SaveBlocks;
+ std::vector<MachineBasicBlock *> RestoreBlocks;
+
+ MachineBasicBlock *Prolog = nullptr;
+ MachineBasicBlock *Epilog = nullptr;
/// Hold the information of the basic block frequency.
/// Use to check the profitability of the new points.
@@ -168,11 +240,17 @@ class ShrinkWrapImpl {
/// therefore this approach is fair.
BitVector StackAddressUsedBlockInfo;
- /// Check if \p MI uses or defines a callee-saved register or
- /// a frame index. If this is the case, this means \p MI must happen
+ /// Check if \p MI uses or defines a frame index.
+ /// If this is the case, this means \p MI must happen
+ /// after Save and before Restore.
+ bool useOrDefFI(const MachineInstr &MI, RegScavenger *RS,
+ bool StackAddressUsed) const;
+
+ /// Check if \p MI uses or defines a callee-saved register.
+ /// If this is the case, this means \p MI must happen
/// after Save and before Restore.
- bool useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
- bool StackAddressUsed) const;
+ bool useOrDefCSR(const MachineInstr &MI, RegScavenger *RS,
+ std::set<Register> *RegsToSave) const;
const SetOfRegs &getCurrentCSRs(RegScavenger *RS) const {
if (CurrentCSRs.empty()) {
@@ -189,12 +267,29 @@ class ShrinkWrapImpl {
return CurrentCSRs;
}
+ std::vector<CalleeSavedInfo> getTargetCSIList(MachineFunction &MF) {
+ const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
+ std::vector<CalleeSavedInfo> TargetCSRs;
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ TargetCSRs.push_back(CalleeSavedInfo(CSRegs[i]));
+ return TargetCSRs;
+ }
+
+ void setupSaveRestorePoints(MachineFunction &MF);
+
+ void performSimpleShrinkWrap(RegScavenger *RS, MachineBasicBlock &SavePoint);
+
+ bool canSplitSaveRestorePoints(
+ const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
+ RegScavenger *RS);
+
/// Update the Save and Restore points such that \p MBB is in
/// the region that is dominated by Save and post-dominated by Restore
/// and Save and Restore still match the safe point definition.
/// Such point may not exist and Save and/or Restore may be null after
/// this call.
- void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
+ void updateSaveRestorePoints(MachineBasicBlock &MBB, Register Reg,
+ RegScavenger *RS);
// Try to find safe point based on dominance and block frequency without
// any change in IR.
@@ -205,7 +300,8 @@ class ShrinkWrapImpl {
/// This function tries to split the restore point if doing so can shrink the
/// save point further. \return True if restore point is split.
bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
- RegScavenger *RS);
+ RegScavenger *RS, MachineBasicBlock *Save,
+ MachineBasicBlock *Restore);
/// This function analyzes if the restore point can split to create a new
/// restore point. This function collects
@@ -225,8 +321,13 @@ class ShrinkWrapImpl {
/// Initialize the pass for \p MF.
void init(MachineFunction &MF) {
RCI.runOnMachineFunction(MF);
- Save = nullptr;
- Restore = nullptr;
+ SavedRegs.clear();
+ SavePoints.clear();
+ RestorePoints.clear();
+ Prolog = nullptr;
+ Epilog = nullptr;
+ SaveBlocks.clear();
+ RestoreBlocks.clear();
EntryFreq = MBFI->getEntryFreq();
const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -242,7 +343,22 @@ class ShrinkWrapImpl {
/// Check whether or not Save and Restore points are still interesting for
/// shrink-wrapping.
- bool ArePointsInteresting() const { return Save != Entry && Save && Restore; }
+ bool AreCandidatesFound(bool splitEnabled) const {
+ if (SavedRegs.empty())
+ return false;
+
+ auto Cond = [splitEnabled, this](auto &RegEntry) {
+ auto [Save, Restore] = RegEntry.second;
+ return (Save && Restore && Save != Entry) == splitEnabled;
+ };
+
+ auto It = std::find_if(begin(SavedRegs), end(SavedRegs), Cond);
+
+ if (It == SavedRegs.end())
+ return !splitEnabled;
+
+ return splitEnabled;
+ }
public:
ShrinkWrapImpl(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT,
@@ -301,8 +417,8 @@ INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
INITIALIZE_PASS_END(ShrinkWrapLegacy, DEBUG_TYPE, "Shrink Wrap Pass", false,
false)
-bool ShrinkWrapImpl::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
- bool StackAddressUsed) const {
+bool ShrinkWrapImpl::useOrDefFI(const MachineInstr &MI, RegScavenger *RS,
+ bool StackAddressUsed) const {
/// Check if \p Op is known to access an address not on the function's stack .
/// At the moment, accesses where the underlying object is a global, function
/// argument, or jump table are considered non-stack accesses. Note that the
@@ -334,10 +450,28 @@ bool ShrinkWrapImpl::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
return true;
}
+
+ if (MI.isDebugValue())
+ return false;
+
+ const auto &Ops = MI.operands();
+
+ auto FIOpIt = std::find_if(Ops.begin(), Ops.end(),
+ [](const auto &MO) { return MO.isFI(); });
+ if (FIOpIt == Ops.end())
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Use or define FI( " << FIOpIt->isFI() << "): " << MI
+ << '\n');
+
+ return true;
+}
+
+bool ShrinkWrapImpl::useOrDefCSR(const MachineInstr &MI, RegScavenger *RS,
+ std::set<Register> *RegsToSave) const {
const MachineFunction *MF = MI.getParent()->getParent();
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
for (const MachineOperand &MO : MI.operands()) {
- bool UseOrDefCSR = false;
if (MO.isReg()) {
// Ignore instructions like DBG_VALUE which don't read/def the register.
if (!MO.isDef() && !MO.readsReg())
@@ -358,26 +492,33 @@ bool ShrinkWrapImpl::useOrDefCSROrFI(const MachineInstr &MI, RegScavenger *RS,
// PPC's Frame pointer (FP) is also not described as a callee-saved
// register. Until the FP is assigned a Physical Register PPC's FP needs
// to be checked separately.
- UseOrDefCSR = (!MI.isCall() && PhysReg == SP) ||
- RCI.getLastCalleeSavedAlias(PhysReg) ||
- (!MI.isReturn() &&
- TRI->isNonallocatableRegisterCalleeSave(PhysReg)) ||
- TRI->isVirtualFrameRegister(PhysReg);
+ if ((!MI.isCall() && PhysReg == SP) ||
+ RCI.getLastCalleeSavedAlias(PhysReg) ||
+ (!MI.isReturn() &&
+ TRI->isNonallocatableRegisterCalleeSave(PhysReg)) ||
+ TRI->isVirtualFrameRegister(PhysReg)) {
+ LLVM_DEBUG(dbgs() << MI << " uses or defines CSR: "
+ << RCI.getLastCalleeSavedAlias(PhysReg) << "\n");
+ if (!RegsToSave)
+ return true;
+
+ RegsToSave->insert(PhysReg);
+ }
} else if (MO.isRegMask()) {
// Check if this regmask clobbers any of the CSRs.
for (unsigned Reg : getCurrentCSRs(RS)) {
if (MO.clobbersPhysReg(Reg)) {
- UseOrDefCSR = true;
- break;
+ if (!RegsToSave)
+ return true;
+ RegsToSave->insert(Reg);
}
}
}
- // Skip FrameIndex operands in DBG_VALUE instructions.
- if (UseOrDefCSR || (MO.isFI() && !MI.isDebugValue())) {
- LLVM_DEBUG(dbgs() << "Use or define CSR(" << UseOrDefCSR << ") or FI("
- << MO.isFI() << "): " << MI << '\n');
- return true;
- }
+ }
+
+ // Skip FrameIndex operands in DBG_VALUE instructions.
+ if (RegsToSave && !RegsToSave->empty()) {
+ return true;
}
return false;
}
@@ -563,7 +704,8 @@ bool ShrinkWrapImpl::checkIfRestoreSplittable(
SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
const TargetInstrInfo *TII, RegScavenger *RS) {
for (const MachineInstr &MI : *CurRestore)
- if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true))
+ if (useOrDefFI(MI, RS, /*StackAddressUsed=*/true) ||
+ useOrDefCSR(MI, RS, nullptr))
return false;
for (MachineBasicBlock *PredBB : CurRestore->predecessors()) {
@@ -580,7 +722,9 @@ bool ShrinkWrapImpl::checkIfRestoreSplittable(
}
bool ShrinkWrapImpl::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
- RegScavenger *RS) {
+ RegScavenger *RS,
+ MachineBasicBlock *Save,
+ MachineBasicBlock *Restore) {
if (!EnablePostShrinkWrapOpt)
return false;
@@ -623,7 +767,8 @@ bool ShrinkWrapImpl::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
continue;
}
for (const MachineInstr &MI : MBB)
- if (useOrDefCSROrFI(MI, RS, /*StackAddressUsed=*/true)) {
+ if (useOrDefFI(MI, RS, /*StackAddressUsed=*/true) ||
+ useOrDefCSR(MI, RS, nullptr)) {
DirtyBBs.insert(&MBB);
break;
}
@@ -683,34 +828,51 @@ bool ShrinkWrapImpl::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
assert((EntryFreq >= MBFI->getBlockFreq(Save) &&
EntryFreq >= MBFI->getBlockFreq(Restore)) &&
"Incorrect save or restore point based on block frequency");
+
+ SavePoints.clear();
+ RestorePoints.clear();
+
+ std::vector<CalleeSavedInfo> CSIV = getTargetCSIList(MF);
+ SavePoints.insert(std::make_pair(Save, CSIV));
+ RestorePoints.insert(std::make_pair(Restore, CSIV));
+ Prolog = Save;
+ Epilog = Restore;
return true;
}
void ShrinkWrapImpl::updateSaveRestorePoints(MachineBasicBlock &MBB,
- RegScavenger *RS) {
+ Register Reg, RegScavenger *RS) {
+ MachineBasicBlock *Save = nullptr;
+ MachineBasicBlock *Restore = nullptr;
+
// Get rid of the easy cases first.
- if (!Save)
- Save = &MBB;
- else
+ if (SavedRegs.contains(Reg) && (Save = SavedRegs.at(Reg).first))
Save = MDT->findNearestCommonDominator(Save, &MBB);
- assert(Save);
+ else {
+ auto Pos =
+ SavedRegs.insert(std::make_pair(Reg, std::make_pair(&MBB, nullptr)));
+ Save = Pos.first->second.first;
+ }
+
+ assert(SavedRegs.contains(Reg) && Save);
+ Restore = SavedRegs.at(Reg).second;
if (!Restore)
Restore = &MBB;
- else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree, it
- // means the block never returns. If that's the
- // case, we don't want to call
+ else if (MPDT->getNode(&MBB)) // If the block is not in the post dom tree,
+ // it means the block never returns. If
+ // that's the case, we don't want to call
// `findNearestCommonDominator`, which will
- // return `Restore`.
+ // return `Restore` and RestoreBlock for
+ // this register will be null.
Restore = MPDT->findNearestCommonDominator(Restore, &MBB);
- else
- Restore = nullptr; // Abort, we can't find a restore point in this case.
// Make sure we would be able to insert the restore code before the
// terminator.
if (Restore == &MBB) {
for (const MachineInstr &Terminator : MBB.terminators()) {
- if (!useOrDefCSROrFI(Terminator, RS, /*StackAddressUsed=*/true))
+ if (!useOrDefFI(Terminator, RS, /*StackAddressUsed=*/true) &&
+ !useOrDefCSR(Terminator, RS, nullptr))
continue;
// One of the terminator needs to happen before the restore point.
if (MBB.succ_empty()) {
@@ -725,8 +887,10 @@ void ShrinkWrapImpl::updateSaveRestorePoints(MachineBasicBlock &MBB,
}
if (!Restore) {
- LLVM_DEBUG(
- dbgs() << "Restore point needs to be spanned on several blocks\n");
+ SavedRegs[Reg].first = Save;
+ SavedRegs[Reg].second = nullptr;
+ LLVM_DEBUG(dbgs() << "Restore point needs to be spanned on several blocks "
+ << Reg << "\n");
return;
}
@@ -802,6 +966,8 @@ void ShrinkWrapImpl::updateSaveRestorePoints(MachineBasicBlock &MBB,
}
}
}
+ SavedRegs[Reg].first = Save;
+ SavedRegs[Reg].second = Restore;
}
static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
@@ -817,9 +983,88 @@ static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
return false;
}
+void ShrinkWrapImpl::setupSaveRestorePoints(MachineFunction &MF) {
+ for (CalleeSavedInfo CSI : getTargetCSIList(MF)) {
+ auto Reg = CSI.getReg();
+ auto [Save, Restore] = SavedRegs[Reg];
+ if (SavedRegs.contains(Reg) && Save && Restore)
+ continue;
+
+ SavePoints.insertReg(Reg, &MF.front(), SaveBlocks);
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHFuncletEntry())
+ SavePoints.insertReg(Reg, &MBB, SaveBlocks);
+ if (MBB.isReturnBlock())
+ RestorePoints.insertReg(Reg, &MBB, RestoreBlocks);
+ }
+ }
+
+ for (auto [Reg, SaveRestoreBlocks] : SavedRegs) {
+ auto [Save, Restore] = SaveRestoreBlocks;
+ if (Save && Restore) {
+ SavePoints.insertReg(Reg, Save, SaveBlocks);
+ if (!Restore->succ_empty() || Restore->isReturnBlock())
+ RestorePoints.insertReg(Reg, Restore, RestoreBlocks);
+ else
+ RestorePoints.insertReg(Reg, Restore, std::nullopt);
+ }
+ }
+}
+
+bool ShrinkWrapImpl::canSplitSaveRestorePoints(
+ const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
+ RegScavenger *RS) {
+ for (MachineBasicBlock *MBB : RPOT) {
+ if (MBB->isEHPad() || MBB->isInlineAsmBrIndirectTarget())
+ return false;
+
+ // Check if we found any stack accesses in the predecessors. We are not
+ // doing a full dataflow analysis here to keep things simple but just
+ // rely on a reverse portorder traversal (RPOT) to guarantee predecessors
+ // are already processed except for loops (and accept the conservative
+ // result for loops).
+ bool StackAddressUsed = any_of(MBB->predecessors(), [&](auto *Pred) {
+ return StackAddressUsedBlockInfo.test(Pred->getNumber());
+ });
+
+ for (const MachineInstr &MI : *MBB) {
+ if (useOrDefFI(MI, RS, StackAddressUsed))
+ return false;
+
+ if (useOrDefCSR(MI, RS, nullptr))
+ StackAddressUsed = true;
+ }
+
+ StackAddressUsedBlockInfo[MBB->getNumber()] = StackAddressUsed;
+ }
+ return true;
+}
+
+void ShrinkWrapImpl::performSimpleShrinkWrap(RegScavenger *RS,
+ MachineBasicBlock &SavePoint) {
+ auto MF = SavePoint.getParent();
+ auto CSIV = getTargetCSIList(*MF);
+ if (!CSIV.empty()) {
+ for (CalleeSavedInfo CSI : CSIV) {
+ auto Reg = CSI.getReg();
+ if (SavedRegs.contains(Reg) &&
+ (!SavedRegs[Reg].first || !SavedRegs[Reg].second))
+ continue;
+ updateSaveRestorePoints(SavePoint, Reg, RS);
+ }
+ } else
+ updateSaveRestorePoints(SavePoint, MCRegister::NoRegister, RS);
+}
+
bool ShrinkWrapImpl::performShrinkWrapping(
const ReversePostOrderTraversal<MachineBasicBlock *> &RPOT,
RegScavenger *RS) {
+ const TargetFrameLowering *TFI =
+ MachineFunc->getSubtarget().getFrameLowering();
+
+ bool canSplit = canSplitSaveRestorePoints(RPOT, RS);
+ StackAddressUsedBlockInfo.set();
+
for (MachineBasicBlock *MBB : RPOT) {
LLVM_DEBUG(dbgs() << "Look into: " << printMBBReference(*MBB) << '\n');
@@ -834,8 +1079,9 @@ bool ShrinkWrapImpl::performShrinkWrapping(
// are at least at the boundary of the save and restore points. The
// problem is that a basic block can jump out from the middle in these
// cases, which we do not handle.
- updateSaveRestorePoints(*MBB, RS);
- if (!ArePointsInteresting()) {
+ performSimpleShrinkWrap(RS, *MBB);
+
+ if (!AreCandidatesFound(false /* splitEnabled */)) {
LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
return false;
}
@@ -855,30 +1101,40 @@ bool ShrinkWrapImpl::performShrinkWrapping(
}
}
+ std::set<Register> RegsToSave;
+
for (const MachineInstr &MI : *MBB) {
- if (useOrDefCSROrFI(MI, RS, StackAddressUsed)) {
- // Save (resp. restore) point must dominate (resp. post dominate)
- // MI. Look for the proper basic block for those.
- updateSaveRestorePoints(*MBB, RS);
- // If we are at a point where we cannot improve the placement of
- // save/restore instructions, just give up.
- if (!ArePointsInteresting()) {
- LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found\n");
+ RegsToSave.clear();
+ if (useOrDefFI(MI, RS, StackAddressUsed)) {
+ performSimpleShrinkWrap(RS, *MBB);
+ if (!AreCandidatesFound(false /* splitEnabled */)) {
+ LLVM_DEBUG(dbgs() << "No Shrink wrap candidate found!~\n");
return false;
}
- // No need to look for other instructions, this basic block
- // will already be part of the handled region.
StackAddressUsed = true;
- break;
+ continue;
+ }
+
+ if (useOrDefCSR(MI, RS, &RegsToSave)) {
+ if (!EnableShrinkWrapSplitOpt ||
+ !TFI->enableCSRSaveRestorePointsSplit() || !canSplit)
+ performSimpleShrinkWrap(RS, *MBB);
+ else {
+ for (auto Reg : RegsToSave) {
+ // Save (resp. restore) point must dominate (resp. post dominate)
+ // MI. Look for the proper basic block for those.
+ updateSaveRestorePoints(*MBB, Reg, RS);
+ }
+ }
+ StackAddressUsed = true;
}
}
StackAddressUsedBlockInfo[MBB->getNumber()] = StackAddressUsed;
}
- if (!ArePointsInteresting()) {
+ if (!AreCandidatesFound(true /* splitEnabled */)) {
// If the points are not interesting at this point, then they must be null
// because it means we did not encounter any frame/CSR related code.
// Otherwise, we would have returned from the previous loop.
- assert(!Save && !Restore && "We miss a shrink-wrap opportunity?!");
LLVM_DEBUG(dbgs() << "Nothing to shrink-wrap\n");
return false;
}
@@ -886,40 +1142,44 @@ bool ShrinkWrapImpl::performShrinkWrapping(
LLVM_DEBUG(dbgs() << "\n ** Results **\nFrequency of the Entry: "
<< EntryFreq.getFrequency() << '\n');
- const TargetFrameLowering *TFI =
- MachineFunc->getSubtarget().getFrameLowering();
- do {
- LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
- << printMBBReference(*Save) << ' '
- << printBlockFreq(*MBFI, *Save)
- << "\nRestore: " << printMBBReference(*Restore) << ' '
- << printBlockFreq(*MBFI, *Restore) << '\n');
-
- bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
- if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save)) &&
- EntryFreq >= MBFI->getBlockFreq(Restore)) &&
- ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) &&
- TFI->canUseAsEpilogue(*Restore)))
- break;
- LLVM_DEBUG(
- dbgs() << "New points are too expensive or invalid for the target\n");
- MachineBasicBlock *NewBB;
- if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) {
- Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
- if (!Save)
- break;
- NewBB = Save;
- } else {
- // Restore is expensive.
- Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
- if (!Restore)
+ for (auto [Reg, SaveRestoreBlocks] : SavedRegs) {
+ auto [Save, Restore] = SaveRestoreBlocks;
+ if (!Save || !Restore)
+ continue;
+
+ do {
+ LLVM_DEBUG(dbgs() << "Shrink wrap candidates (#, Name, Freq):\nSave: "
+ << printMBBReference(*Save) << ' '
+ << printBlockFreq(*MBFI, *Save)
+ << "\nRestore: " << printMBBReference(*Restore) << ' '
+ << printBlockFreq(*MBFI, *Restore) << '\n');
+
+ bool IsSaveCheap, TargetCanUseSaveAsPrologue = false;
+ if (((IsSaveCheap = EntryFreq >= MBFI->getBlockFreq(Save)) &&
+ EntryFreq >= MBFI->getBlockFreq(Restore)) &&
+ ((TargetCanUseSaveAsPrologue = TFI->canUseAsPrologue(*Save)) &&
+ TFI->canUseAsEpilogue(*Restore)))
break;
- NewBB = Restore;
- }
- updateSaveRestorePoints(*NewBB, RS);
- } while (Save && Restore);
+ LLVM_DEBUG(
+ dbgs() << "New points are too expensive or invalid for the target\n");
+ MachineBasicBlock *NewBB;
+ if (!IsSaveCheap || !TargetCanUseSaveAsPrologue) {
+ Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+ if (!Save)
+ break;
+ NewBB = Save;
+ } else {
+ // Restore is expensive.
+ Restore = FindIDom<>(*Restore, Restore->successors(), *MPDT);
+ if (!Restore)
+ break;
+ NewBB = Restore;
+ }
+ updateSaveRestorePoints(*NewBB, Reg, RS);
+ } while (Save && Restore);
+ }
- if (!ArePointsInteresting()) {
+ if (!AreCandidatesFound(true /* splitEnabled */)) {
++NumCandidatesDropped;
return false;
}
@@ -954,28 +1214,51 @@ bool ShrinkWrapImpl::run(MachineFunction &MF) {
// basic block and change the state only for those basic blocks for which we
// were able to prove the opposite.
StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true);
- bool HasCandidate = performShrinkWrapping(RPOT, RS.get());
+ bool HasCandidates = performShrinkWrapping(RPOT, RS.get());
StackAddressUsedBlockInfo.clear();
- Changed = postShrinkWrapping(HasCandidate, MF, RS.get());
- if (!HasCandidate && !Changed)
- return false;
- if (!ArePointsInteresting())
- return Changed;
+ if (HasCandidates) {
+ setupSaveRestorePoints(MF);
+ Prolog = SaveBlocks.empty()
+ ? nullptr
+ : MDT->findNearestCommonDominator(iterator_range(SaveBlocks));
+ Epilog =
+ RestoreBlocks.empty()
+ ? nullptr
+ : MPDT->findNearestCommonDominator(iterator_range(RestoreBlocks));
+ }
- LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
- << printMBBReference(*Save) << ' '
- << "\nRestore: " << printMBBReference(*Restore) << '\n');
+ if (!HasCandidates ||
+ (!SavePoints.areMultiple() && !RestorePoints.areMultiple())) {
+ Changed =
+ postShrinkWrapping(HasCandidates, MF, RS.get(), SavePoints.getFirst(),
+ RestorePoints.getFirst());
+ if (!HasCandidates && !Changed)
+ return false;
- MachineFrameInfo &MFI = MF.getFrameInfo();
+ if ((!SavePoints.getFirst()) || (!RestorePoints.getFirst()) ||
+ (SavePoints.getFirst() == Entry))
+ return Changed;
+ }
+
+ if (SavePoints.areMultiple() || RestorePoints.areMultiple()) {
+ ++NumFuncWithSplitting;
+ }
- // List of CalleeSavedInfo for registers will be added during prologepilog
- // pass
- SaveRestorePoints SavePoints({{Save, {}}});
- SaveRestorePoints RestorePoints({{Restore, {}}});
+ LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\n");
+
+ LLVM_DEBUG(dbgs() << "SavePoints:\n");
+ LLVM_DEBUG(SavePoints.dump(TRI));
+
+ LLVM_DEBUG(dbgs() << "RestorePoints:\n");
+ LLVM_DEBUG(RestorePoints.dump(TRI));
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
- MFI.setSavePoints(SavePoints);
- MFI.setRestorePoints(RestorePoints);
+ MFI.setProlog(Prolog);
+ MFI.setEpilog(Epilog);
+ MFI.setSavePoints(SavePoints.get());
+ MFI.setRestorePoints(RestorePoints.get());
++NumCandidates;
return Changed;
}
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index cbd08f0fb5dff..0b9b9910e9dc7 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -209,20 +209,21 @@ void SILowerSGPRSpills::calculateSaveRestoreBlocks(MachineFunction &MF) {
// So set the save points for those.
// Use the points found by shrink-wrapping, if any.
+
if (!MFI.getSavePoints().empty()) {
- assert(MFI.getSavePoints().size() == 1 &&
- "Multiple save points not yet supported!");
- const auto &SavePoint = *MFI.getSavePoints().begin();
- SaveBlocks.push_back(SavePoint.first);
- assert(MFI.getRestorePoints().size() == 1 &&
- "Multiple restore points not yet supported!");
- const auto &RestorePoint = *MFI.getRestorePoints().begin();
- MachineBasicBlock *RestoreBlock = RestorePoint.first;
- // If RestoreBlock does not have any successor and is not a return block
- // then the end point is unreachable and we do not need to insert any
- // epilogue.
- if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
- RestoreBlocks.push_back(RestoreBlock);
+ assert(!MFI.getRestorePoints().empty() &&
+ "Both restores and saves must be set");
+ for (auto &item : MFI.getSavePoints())
+ SaveBlocks.push_back(item.first);
+
+ for (auto &item : MFI.getRestorePoints()) {
+ MachineBasicBlock *RestoreBlock = item.first;
+ // If RestoreBlock does not have any successor and is not a return block
+ // then the end point is unreachable and we do not need to insert any
+ // epilogue.
+ if (!RestoreBlock->succ_empty() || RestoreBlock->isReturnBlock())
+ RestoreBlocks.push_back(RestoreBlock);
+ }
return;
}
diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index aae3e49f6c70b..0fa9a1037744b 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -2079,8 +2079,6 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
// won't be generated by emitEpilogue(), because shrink-wrap has chosen new
// RestoreBlock. So we handle this case here.
if (!MFI.getSavePoints().empty() && MFI.hasTailCall()) {
- assert(MFI.getRestorePoints().size() < 2 &&
- "MFI can't contain multiple restore points!");
for (MachineBasicBlock &MBB : MF) {
if (MBB.isReturnBlock() && (!MFI.getRestorePoints().contains(&MBB)))
createTailCallBranchInstr(MBB);
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 76a121aa4aa96..58307fe55163e 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -566,7 +566,6 @@ getRVVCalleeSavedInfo(const MachineFunction &MF,
const std::vector<CalleeSavedInfo> &CSI) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
SmallVector<CalleeSavedInfo, 8> RVVCSI;
-
for (auto &CS : CSI) {
int FI = CS.getFrameIdx();
if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector)
@@ -2228,8 +2227,10 @@ bool RISCVFrameLowering::spillCalleeSavedRegisters(
std::vector<CalleeSavedInfo> GCSI = MFI.getCalleeSavedInfo();
unsigned MinCSFI = std::numeric_limits<unsigned>::max();
for (auto CS : GCSI) {
- if (CS.getFrameIdx() >= 0 && CS.getFrameIdx() < MinCSFI)
- MinCSFI = CS.getFrameIdx();
+ unsigned NonNegCSFI =
+ CS.getFrameIdx() >= 0 ? CS.getFrameIdx() : MinCSFI;
+ if (NonNegCSFI < MinCSFI)
+ MinCSFI = NonNegCSFI;
}
if (MinCSFI == std::numeric_limits<unsigned>::max())
MinCSFI = 0;
@@ -2599,3 +2600,14 @@ Register
RISCVFrameLowering::getInitialCFARegister(const MachineFunction &MF) const {
return RISCV::X2;
}
+
+bool RISCVFrameLowering::enableCSRSaveRestorePointsSplit() const {
+ // Zcmp extention introduces cm.push and cm.pop instructions, which allow to
+ // perform all spills and restores in one corresponding instruction. This
+ // contradicts the idea of splitting Save Restore points. "-msave-restore"
+ // does the same, not via new instructions but via save/restore libcalls.
+ if (!STI.hasStdExtZcmp() && !STI.enableSaveRestore() &&
+ !STI.hasVendorXqccmp())
+ return true;
+ return false;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.h b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
index 9c88c49fa45de..d4ce5b152a402 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.h
@@ -91,6 +91,8 @@ class RISCVFrameLowering : public TargetFrameLowering {
uint64_t ProbeSize, bool DynAllocation,
MachineInstr::MIFlag Flag) const;
+ bool enableCSRSaveRestorePointsSplit() const override;
+
protected:
const RISCVSubtarget &STI;
diff --git a/llvm/test/CodeGen/RISCV/shrinkwrap-split.mir b/llvm/test/CodeGen/RISCV/shrinkwrap-split.mir
new file mode 100644
index 0000000000000..60e0a3fab16e1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/shrinkwrap-split.mir
@@ -0,0 +1,290 @@
+# RUN: llc -march=riscv64 -run-pass shrink-wrap -enable-shrink-wrap-into-multiple-points=true %s -o - | FileCheck %s
+
+# CHECK: savePoint:
+# CHECK-NEXT: - point: '%bb.0'
+# CHECK-NEXT: registers:
+# CHECK-NEXT: - '$x1'
+# CHECK-NEXT: - '$x26'
+# CHECK-NEXT: - '$x27'
+# CHECK-NEXT: - '$x9'
+# CHECK-NEXT: - point: '%bb.2'
+# CHECK-NEXT: registers:
+# CHECK-NEXT: - '$x18'
+# CHECK-NEXT: - '$x19'
+# CHECK-NEXT: - '$x20'
+# CHECK-NEXT: - '$x21'
+# CHECK-NEXT: - '$x22'
+# CHECK-NEXT: - '$x23'
+# CHECK-NEXT: - '$x24'
+# CHECK-NEXT: - '$x25'
+# CHECK-NEXT: - '$x8'
+# CHECK-NEXT: restorePoint:
+# CHECK-NEXT: - point: '%bb.7'
+# CHECK-NEXT: registers:
+# CHECK-NEXT: - '$x18'
+# CHECK-NEXT: - '$x19'
+# CHECK-NEXT: - '$x20'
+# CHECK-NEXT: - '$x21'
+# CHECK-NEXT: - '$x22'
+# CHECK-NEXT: - '$x23'
+# CHECK-NEXT: - '$x24'
+# CHECK-NEXT: - '$x25'
+# CHECK-NEXT: - '$x8'
+# CHECK-NEXT: - point: '%bb.8'
+# CHECK-NEXT: registers:
+# CHECK-NEXT: - '$x1'
+# CHECK-NEXT: - '$x26'
+# CHECK-NEXT: - '$x27'
+# CHECK-NEXT: - '$x9'
+
+--- |
+ ; ModuleID = 'shrinkwrap-split.ll'
+ %struct.task = type { i32, i32, [20 x i32] }
+
+ define i32 @test(ptr %t, i32 %i, i1 %cond) {
+ entry:
+ %arr = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2
+ %0 = load i32, ptr %arr, align 4
+ %arrayidx2 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 1
+ %1 = load i32, ptr %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 2
+ %2 = load i32, ptr %arrayidx4, align 4
+ %arrayidx6 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 3
+ %3 = load i32, ptr %arrayidx6, align 4
+ %arrayidx8 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 4
+ %4 = load i32, ptr %arrayidx8, align 4
+ %arrayidx10 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 5
+ %5 = load i32, ptr %arrayidx10, align 4
+ %arrayidx12 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 6
+ %6 = load i32, ptr %arrayidx12, align 4
+ %arrayidx14 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 7
+ %7 = load i32, ptr %arrayidx14, align 4
+ %arrayidx16 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 8
+ %8 = load i32, ptr %arrayidx16, align 4
+ %arrayidx18 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 9
+ %9 = load i32, ptr %arrayidx18, align 4
+ %arrayidx20 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 10
+ %10 = load i32, ptr %arrayidx20, align 4
+ %arrayidx22 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 11
+ %11 = load i32, ptr %arrayidx22, align 4
+ %arrayidx24 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 12
+ %12 = load i32, ptr %arrayidx24, align 4
+ %arrayidx26 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 13
+ %13 = load i32, ptr %arrayidx26, align 4
+ %arrayidx28 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 14
+ %14 = load i32, ptr %arrayidx28, align 4
+ %arrayidx30 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 15
+ %15 = load i32, ptr %arrayidx30, align 4
+ %arrayidx32 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 16
+ %16 = load i32, ptr %arrayidx32, align 4
+ %arrayidx34 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 17
+ %17 = load i32, ptr %arrayidx34, align 4
+ %arrayidx36 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 18
+ %18 = load i32, ptr %arrayidx36, align 4
+ %arrayidx38 = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 2, i64 19
+ %19 = load i32, ptr %arrayidx38, align 4
+ %20 = load i32, ptr %t, align 4
+ %add = add i32 %10, %0
+ %add39 = add i32 %add, %20
+ %cmp = icmp slt i32 %add39, %i
+ br i1 %cmp, label %for.cond.preheader, label %cleanup
+
+ for.cond.preheader: ; preds = %entry
+ %y = getelementptr inbounds %struct.task, ptr %t, i64 0, i32 1
+ %21 = load i32, ptr %y, align 4
+ %cmp40.not119 = icmp eq i32 %21, 0
+ br i1 %cmp40.not119, label %for.cond.cleanup, label %for.body.preheader
+
+ for.body.preheader: ; preds = %for.cond.preheader
+ %22 = add i32 %21, -1
+ %cond41 = select i1 %cond, i32 %22, i32 %i
+ store i32 %cond41, ptr %t, align 4
+ br label %for.cond.cleanup
+
+ for.cond.cleanup: ; preds = %for.body.preheader, %for.cond.preheader
+ %23 = phi i32 [ %cond41, %for.body.preheader ], [ %20, %for.cond.preheader ]
+ %tobool44 = icmp ne i32 %21, 0
+ %conv = zext i1 %tobool44 to i32
+ %add48 = add i32 %1, %0
+ %add49 = add i32 %add48, %2
+ %add50 = add i32 %add49, %3
+ %add51 = add i32 %add50, %4
+ %add52 = add i32 %add51, %5
+ %add53 = add i32 %add52, %6
+ %add54 = add i32 %add53, %7
+ %add55 = add i32 %add54, %8
+ %add56 = add i32 %add55, %9
+ %add57 = add i32 %add56, %10
+ %add58 = add i32 %add57, %11
+ %add59 = add i32 %add58, %12
+ %add60 = add i32 %add59, %13
+ %add61 = add i32 %add60, %14
+ %add62 = add i32 %add61, %15
+ %add63 = add i32 %add62, %16
+ %add64 = add i32 %add63, %17
+ %add65 = add i32 %add64, %18
+ %add66 = add i32 %add65, %19
+ %add67 = add i32 %add66, %conv
+ %add68 = add i32 %add67, %23
+ br label %cleanup
+
+ cleanup: ; preds = %for.cond.cleanup, %entry
+ %retval.0 = phi i32 [ %add68, %for.cond.cleanup ], [ %i, %entry ]
+ ret i32 %retval.0
+ }
+...
+---
+name: test
+alignment: 2
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: true
+registers: []
+liveins:
+ - { reg: '$x10', virtual-reg: '' }
+ - { reg: '$x11', virtual-reg: '' }
+ - { reg: '$x12', virtual-reg: '' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ localFrameSize: 0
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ varArgsFrameIndex: 0
+ varArgsSaveSize: 0
+body: |
+ bb.0.entry:
+ successors: %bb.1(0x40000000), %bb.8(0x40000000)
+ liveins: $x10, $x11, $x12
+
+ renamable $x13 = COPY $x10
+ renamable $x9 = LW $x10, 8 :: (load (s32) from %ir.arr)
+ renamable $x16 = LW $x10, 48 :: (load (s32) from %ir.arrayidx20)
+ renamable $x15 = LW $x10, 0 :: (load (s32) from %ir.t)
+ renamable $x14 = ADD renamable $x16, renamable $x9
+ renamable $x10 = COPY $x11
+ renamable $x14 = ADDW killed renamable $x14, renamable $x15
+ BLT killed renamable $x14, $x11, %bb.1
+
+ bb.8:
+ successors: %bb.7(0x80000000)
+ liveins: $x10
+
+ PseudoBR %bb.7
+
+ bb.1.for.cond.preheader:
+ successors: %bb.6(0x30000000), %bb.2(0x50000000)
+ liveins: $x9, $x10, $x12, $x13, $x15, $x16
+
+ renamable $x20 = LW renamable $x13, 12 :: (load (s32) from %ir.arrayidx2)
+ renamable $x30 = LW renamable $x13, 16 :: (load (s32) from %ir.arrayidx4)
+ renamable $x24 = LW renamable $x13, 20 :: (load (s32) from %ir.arrayidx6)
+ renamable $x31 = LW renamable $x13, 24 :: (load (s32) from %ir.arrayidx8)
+ renamable $x25 = LW renamable $x13, 28 :: (load (s32) from %ir.arrayidx10)
+ renamable $x7 = LW renamable $x13, 32 :: (load (s32) from %ir.arrayidx12)
+ renamable $x21 = LW renamable $x13, 36 :: (load (s32) from %ir.arrayidx14)
+ renamable $x14 = LW renamable $x13, 40 :: (load (s32) from %ir.arrayidx16)
+ renamable $x23 = LW renamable $x13, 44 :: (load (s32) from %ir.arrayidx18)
+ renamable $x29 = LW renamable $x13, 52 :: (load (s32) from %ir.arrayidx22)
+ renamable $x22 = LW renamable $x13, 56 :: (load (s32) from %ir.arrayidx24)
+ renamable $x6 = LW renamable $x13, 60 :: (load (s32) from %ir.arrayidx26)
+ renamable $x17 = LW renamable $x13, 64 :: (load (s32) from %ir.arrayidx28)
+ renamable $x5 = LW renamable $x13, 68 :: (load (s32) from %ir.arrayidx30)
+ renamable $x28 = LW renamable $x13, 72 :: (load (s32) from %ir.arrayidx32)
+ renamable $x18 = LW renamable $x13, 76 :: (load (s32) from %ir.arrayidx34)
+ renamable $x19 = LW renamable $x13, 80 :: (load (s32) from %ir.arrayidx36)
+ renamable $x8 = LW renamable $x13, 4 :: (load (s32) from %ir.y)
+ renamable $x11 = LW renamable $x13, 84 :: (load (s32) from %ir.arrayidx38)
+ BEQ renamable $x8, $x0, %bb.6
+ PseudoBR %bb.2
+
+ bb.2.for.body.preheader:
+ successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ liveins: $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x12, $x13, $x14, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x28, $x29, $x30, $x31
+
+ BEQ killed renamable $x12, $x0, %bb.4
+
+ bb.3:
+ successors: %bb.5(0x80000000)
+ liveins: $x5, $x6, $x7, $x8, $x9, $x11, $x13, $x14, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x28, $x29, $x30, $x31
+
+ renamable $x10 = ADDIW renamable $x8, -1
+ PseudoBR %bb.5
+
+ bb.4.for.body.preheader:
+ successors: %bb.5(0x80000000)
+ liveins: $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x13, $x14, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x28, $x29, $x30, $x31
+
+
+ bb.5.for.body.preheader:
+ successors: %bb.6(0x80000000)
+ liveins: $x5, $x6, $x7, $x8, $x9, $x10, $x11, $x13, $x14, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x28, $x29, $x30, $x31
+
+ SW renamable $x10, killed renamable $x13, 0 :: (store (s32) into %ir.t)
+ renamable $x15 = COPY killed renamable $x10
+
+ bb.6.for.cond.cleanup:
+ successors: %bb.7(0x80000000)
+ liveins: $x5, $x6, $x7, $x8, $x9, $x11, $x14, $x15, $x16, $x17, $x18, $x19, $x20, $x21, $x22, $x23, $x24, $x25, $x28, $x29, $x30, $x31
+
+ renamable $x9 = ADD killed renamable $x20, killed renamable $x9
+ renamable $x30 = ADD killed renamable $x30, killed renamable $x24
+ renamable $x30 = ADD killed renamable $x9, killed renamable $x30
+ renamable $x31 = ADD killed renamable $x31, killed renamable $x25
+ renamable $x14 = ADD killed renamable $x21, killed renamable $x14
+ renamable $x7 = ADD killed renamable $x31, killed renamable $x7
+ renamable $x14 = ADD killed renamable $x14, killed renamable $x23
+ renamable $x7 = ADD killed renamable $x30, killed renamable $x7
+ renamable $x14 = ADD killed renamable $x14, killed renamable $x16
+ renamable $x10 = SLTU $x0, killed renamable $x8
+ renamable $x14 = ADD killed renamable $x7, killed renamable $x14
+ renamable $x29 = ADD killed renamable $x29, killed renamable $x22
+ renamable $x28 = ADD killed renamable $x28, killed renamable $x18
+ renamable $x6 = ADD killed renamable $x29, killed renamable $x6
+ renamable $x28 = ADD killed renamable $x28, killed renamable $x19
+ renamable $x17 = ADD killed renamable $x6, killed renamable $x17
+ renamable $x11 = ADD killed renamable $x28, killed renamable $x11
+ renamable $x17 = ADD killed renamable $x17, killed renamable $x5
+ renamable $x10 = ADD killed renamable $x11, killed renamable $x10
+ renamable $x14 = ADD killed renamable $x14, killed renamable $x17
+ renamable $x10 = ADD killed renamable $x10, killed renamable $x15
+ renamable $x10 = ADDW killed renamable $x14, killed renamable $x10
+
+ bb.7.cleanup:
+ liveins: $x10
+
+ PseudoRET implicit $x10
+
+...
diff --git a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp
index c479233a712e7..548e5071405f0 100644
--- a/llvm/tools/llvm-reduce/ReducerWorkItem.cpp
+++ b/llvm/tools/llvm-reduce/ReducerWorkItem.cpp
@@ -101,15 +101,9 @@ static void cloneFrameInfo(
DstMFI.setCVBytesOfCalleeSavedRegisters(
SrcMFI.getCVBytesOfCalleeSavedRegisters());
- assert(SrcMFI.getSavePoints().size() < 2 &&
- "Multiple restore points not yet supported!");
-
DstMFI.setSavePoints(
constructSaveRestorePoints(SrcMFI.getSavePoints(), Src2DstMBB));
- assert(SrcMFI.getRestorePoints().size() < 2 &&
- "Multiple restore points not yet supported!");
-
DstMFI.setRestorePoints(
constructSaveRestorePoints(SrcMFI.getRestorePoints(), Src2DstMBB));
More information about the llvm-commits
mailing list