[llvm] 1569b36 - [CodeGen][ShrinkWrap] Split restore point
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 11 05:23:32 PDT 2023
Author: sgokhale
Date: 2023-05-11T17:51:48+05:30
New Revision: 1569b36ee9dda0fd8b23964ce8a969919f174f99
URL: https://github.com/llvm/llvm-project/commit/1569b36ee9dda0fd8b23964ce8a969919f174f99
DIFF: https://github.com/llvm/llvm-project/commit/1569b36ee9dda0fd8b23964ce8a969919f174f99.diff
LOG: [CodeGen][ShrinkWrap] Split restore point
Land D42600 with optimisation disabled by default by setting 'enable-shrink-wrap-region-split' option.
This is just to reduce effort involved in making changes to patch each time issue is detected and reland the whole patch.
Added:
Modified:
llvm/lib/CodeGen/ShrinkWrap.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index b219b83bbc2f..a040bc7c89c6 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -98,6 +98,9 @@ STATISTIC(NumCandidatesDropped,
static cl::opt<cl::boolOrDefault>
EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
cl::desc("enable the shrink-wrapping pass"));
+static cl::opt<bool> EnablePostShrinkWrapOpt(
+ "enable-shrink-wrap-region-split", cl::init(false), cl::Hidden,
+ cl::desc("enable splitting of the restore block if possible"));
namespace {
@@ -185,6 +188,30 @@ class ShrinkWrap : public MachineFunctionPass {
/// this call.
void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
+ // Try to find safe point based on dominance and block frequency without
+ // any change in IR.
+ bool performShrinkWrapping(MachineFunction &MF, RegScavenger *RS);
+
+ /// This function tries to split the restore point if doing so can shrink the
+ /// save point further. \return True if restore point is split.
+ bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
+ RegScavenger *RS);
+
+ /// This function analyzes if the restore point can split to create a new
+ /// restore point. This function collects
+ /// 1. Any preds of current restore that are reachable by callee save/FI
+ /// blocks
+ /// - indicated by DirtyPreds
+ /// 2. Any preds of current restore that are not DirtyPreds - indicated by
+ /// CleanPreds
+ /// Both sets should be non-empty for considering restore point split.
+ bool checkIfRestoreSplittable(
+ const MachineBasicBlock *CurRestore,
+ const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+ SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
+ SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
+ const TargetInstrInfo *TII, RegScavenger *RS);
+
/// Initialize the pass for \p MF.
void init(MachineFunction &MF) {
RCI.runOnMachineFunction(MF);
@@ -338,18 +365,311 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
/// Helper function to find the immediate (post) dominator.
template <typename ListOfBBs, typename DominanceAnalysis>
static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
- DominanceAnalysis &Dom) {
+ DominanceAnalysis &Dom, bool Strict = true) {
MachineBasicBlock *IDom = &Block;
for (MachineBasicBlock *BB : BBs) {
IDom = Dom.findNearestCommonDominator(IDom, BB);
if (!IDom)
break;
}
- if (IDom == &Block)
+ if (Strict && IDom == &Block)
return nullptr;
return IDom;
}
+static bool isAnalyzableBB(const TargetInstrInfo &TII,
+ MachineBasicBlock &Entry) {
+ // Check if the block is analyzable.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ return !TII.analyzeBranch(Entry, TBB, FBB, Cond);
+}
+
+/// Determines if any predecessor of MBB is on the path from block that has use
+/// or def of CSRs/FI to MBB.
+/// ReachableByDirty: All blocks reachable from block that has use or def of
+/// CSR/FI.
+static bool
+hasDirtyPred(const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+ const MachineBasicBlock &MBB) {
+ for (const MachineBasicBlock *PredBB : MBB.predecessors())
+ if (ReachableByDirty.count(PredBB))
+ return true;
+ return false;
+}
+
+/// Derives the list of all the basic blocks reachable from MBB.
+static void markAllReachable(DenseSet<const MachineBasicBlock *> &Visited,
+ const MachineBasicBlock &MBB) {
+ SmallVector<MachineBasicBlock *, 4> Worklist(MBB.succ_begin(),
+ MBB.succ_end());
+ Visited.insert(&MBB);
+ while (!Worklist.empty()) {
+ MachineBasicBlock *SuccMBB = Worklist.pop_back_val();
+ if (!Visited.insert(SuccMBB).second)
+ continue;
+ Worklist.append(SuccMBB->succ_begin(), SuccMBB->succ_end());
+ }
+}
+
+/// Collect blocks reachable by use or def of CSRs/FI.
+static void collectBlocksReachableByDirty(
+ const DenseSet<const MachineBasicBlock *> &DirtyBBs,
+ DenseSet<const MachineBasicBlock *> &ReachableByDirty) {
+ for (const MachineBasicBlock *MBB : DirtyBBs) {
+ if (ReachableByDirty.count(MBB))
+ continue;
+ // Mark all offsprings as reachable.
+ markAllReachable(ReachableByDirty, *MBB);
+ }
+}
+
+/// \return true if there is a clean path from SavePoint to the original
+/// Restore.
+static bool
+isSaveReachableThroughClean(const MachineBasicBlock *SavePoint,
+ ArrayRef<MachineBasicBlock *> CleanPreds) {
+ DenseSet<const MachineBasicBlock *> Visited;
+ SmallVector<MachineBasicBlock *, 4> Worklist(CleanPreds.begin(),
+ CleanPreds.end());
+ while (!Worklist.empty()) {
+ MachineBasicBlock *CleanBB = Worklist.pop_back_val();
+ if (CleanBB == SavePoint)
+ return true;
+ if (!Visited.insert(CleanBB).second || !CleanBB->pred_size())
+ continue;
+ Worklist.append(CleanBB->pred_begin(), CleanBB->pred_end());
+ }
+ return false;
+}
+
+/// This function updates the branches post restore point split.
+///
+/// Restore point has been split.
+/// Old restore point: MBB
+/// New restore point: NMBB
+/// Any basic block(say BBToUpdate) which had a fallthrough to MBB
+/// previously should
+/// 1. Fallthrough to NMBB iff NMBB is inserted immediately above MBB in the
+/// block layout OR
+/// 2. Branch unconditionally to NMBB iff NMBB is inserted at any other place.
+static void updateTerminator(MachineBasicBlock *BBToUpdate,
+ MachineBasicBlock *NMBB,
+ const TargetInstrInfo *TII) {
+ DebugLoc DL = BBToUpdate->findBranchDebugLoc();
+ // if NMBB isn't the new layout successor for BBToUpdate, insert unconditional
+ // branch to it
+ if (!BBToUpdate->isLayoutSuccessor(NMBB))
+ TII->insertUnconditionalBranch(*BBToUpdate, NMBB, DL);
+}
+
+/// This function splits the restore point and returns new restore point/BB.
+///
+/// DirtyPreds: Predessors of \p MBB that are ReachableByDirty
+///
+/// Decision has been made to split the restore point.
+/// old restore point: \p MBB
+/// new restore point: \p NMBB
+/// This function makes the necessary block layout changes so that
+/// 1. \p NMBB points to \p MBB unconditionally
+/// 2. All dirtyPreds that previously pointed to \p MBB point to \p NMBB
+static MachineBasicBlock *
+tryToSplitRestore(MachineBasicBlock *MBB,
+ ArrayRef<MachineBasicBlock *> DirtyPreds,
+ const TargetInstrInfo *TII) {
+ MachineFunction *MF = MBB->getParent();
+
+ // get the list of DirtyPreds who have a fallthrough to MBB
+ // before the block layout change. This is just to ensure that if the NMBB is
+ // inserted after MBB, then we create unconditional branch from
+ // DirtyPred/CleanPred to NMBB
+ SmallPtrSet<MachineBasicBlock *, 8> MBBFallthrough;
+ for (MachineBasicBlock *BB : DirtyPreds)
+ if (BB->getFallThrough(false) == MBB)
+ MBBFallthrough.insert(BB);
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ // Insert this block at the end of the function. Inserting in between may
+ // interfere with control flow optimizer decisions.
+ MF->insert(MF->end(), NMBB);
+
+ for (const MachineBasicBlock::RegisterMaskPair &LI : MBB->liveins())
+ NMBB->addLiveIn(LI.PhysReg);
+
+ TII->insertUnconditionalBranch(*NMBB, MBB, DebugLoc());
+
+ // After splitting, all predecessors of the restore point should be dirty
+ // blocks.
+ for (MachineBasicBlock *SuccBB : DirtyPreds)
+ SuccBB->ReplaceUsesOfBlockWith(MBB, NMBB);
+
+ NMBB->addSuccessor(MBB);
+
+ for (MachineBasicBlock *BBToUpdate : MBBFallthrough)
+ updateTerminator(BBToUpdate, NMBB, TII);
+
+ return NMBB;
+}
+
+/// This function undoes the restore point split done earlier.
+///
+/// DirtyPreds: All predecessors of \p NMBB that are ReachableByDirty.
+///
+/// Restore point was split and the change needs to be unrolled. Make necessary
+/// changes to reset restore point from \p NMBB to \p MBB.
+static void rollbackRestoreSplit(MachineFunction &MF, MachineBasicBlock *NMBB,
+ MachineBasicBlock *MBB,
+ ArrayRef<MachineBasicBlock *> DirtyPreds,
+ const TargetInstrInfo *TII) {
+ // For a BB, if NMBB is fallthrough in the current layout, then in the new
+ // layout a. BB should fallthrough to MBB OR b. BB should undconditionally
+ // branch to MBB
+ SmallPtrSet<MachineBasicBlock *, 8> NMBBFallthrough;
+ for (MachineBasicBlock *BB : DirtyPreds)
+ if (BB->getFallThrough(false) == NMBB)
+ NMBBFallthrough.insert(BB);
+
+ NMBB->removeSuccessor(MBB);
+ for (MachineBasicBlock *SuccBB : DirtyPreds)
+ SuccBB->ReplaceUsesOfBlockWith(NMBB, MBB);
+
+ NMBB->erase(NMBB->begin(), NMBB->end());
+ NMBB->eraseFromParent();
+
+ for (MachineBasicBlock *BBToUpdate : NMBBFallthrough)
+ updateTerminator(BBToUpdate, MBB, TII);
+}
+
+// A block is deemed fit for restore point split iff there exist
+// 1. DirtyPreds - preds of CurRestore reachable from use or def of CSR/FI
+// 2. CleanPreds - preds of CurRestore that arent DirtyPreds
+bool ShrinkWrap::checkIfRestoreSplittable(
+ const MachineBasicBlock *CurRestore,
+ const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+ SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
+ SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
+ const TargetInstrInfo *TII, RegScavenger *RS) {
+ for (const MachineInstr &MI : *CurRestore)
+ if (useOrDefCSROrFI(MI, RS))
+ return false;
+
+ for (MachineBasicBlock *PredBB : CurRestore->predecessors()) {
+ if (!isAnalyzableBB(*TII, *PredBB))
+ return false;
+
+ if (ReachableByDirty.count(PredBB))
+ DirtyPreds.push_back(PredBB);
+ else
+ CleanPreds.push_back(PredBB);
+ }
+
+ return !(CleanPreds.empty() || DirtyPreds.empty());
+}
+
+bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
+ RegScavenger *RS) {
+ if (!EnablePostShrinkWrapOpt)
+ return false;
+
+ MachineBasicBlock *InitSave = nullptr;
+ MachineBasicBlock *InitRestore = nullptr;
+
+ if (HasCandidate) {
+ InitSave = Save;
+ InitRestore = Restore;
+ } else {
+ InitRestore = nullptr;
+ InitSave = &MF.front();
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHFuncletEntry())
+ return false;
+ if (MBB.isReturnBlock()) {
+ // Do not support multiple restore points.
+ if (InitRestore)
+ return false;
+ InitRestore = &MBB;
+ }
+ }
+ }
+
+ if (!InitSave || !InitRestore || InitRestore == InitSave ||
+ !MDT->dominates(InitSave, InitRestore) ||
+ !MPDT->dominates(InitRestore, InitSave))
+ return false;
+
+ // Bail out of the optimization if any of the basic block is target of
+ // INLINEASM_BR instruction
+ for (MachineBasicBlock &MBB : MF)
+ if (MBB.isInlineAsmBrIndirectTarget())
+ return false;
+
+ DenseSet<const MachineBasicBlock *> DirtyBBs;
+ for (MachineBasicBlock &MBB : MF) {
+ if (MBB.isEHPad()) {
+ DirtyBBs.insert(&MBB);
+ continue;
+ }
+ for (const MachineInstr &MI : MBB)
+ if (useOrDefCSROrFI(MI, RS)) {
+ DirtyBBs.insert(&MBB);
+ break;
+ }
+ }
+
+ // Find blocks reachable from the use or def of CSRs/FI.
+ DenseSet<const MachineBasicBlock *> ReachableByDirty;
+ collectBlocksReachableByDirty(DirtyBBs, ReachableByDirty);
+
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ SmallVector<MachineBasicBlock *, 2> DirtyPreds;
+ SmallVector<MachineBasicBlock *, 2> CleanPreds;
+ if (!checkIfRestoreSplittable(InitRestore, ReachableByDirty, DirtyPreds,
+ CleanPreds, TII, RS))
+ return false;
+
+ // Trying to reach out to the new save point which dominates all dirty blocks.
+ MachineBasicBlock *NewSave =
+ FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false);
+
+ while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) ||
+ EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency()))
+ NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT,
+ false);
+
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+ if (!NewSave || NewSave == InitSave ||
+ isSaveReachableThroughClean(NewSave, CleanPreds) ||
+ !TFI->canUseAsPrologue(*NewSave))
+ return false;
+
+ // Now we know that splitting a restore point can isolate the restore point
+ // from clean blocks and doing so can shrink the save point.
+ MachineBasicBlock *NewRestore =
+ tryToSplitRestore(InitRestore, DirtyPreds, TII);
+
+ // Make sure if the new restore point is valid as an epilogue, depending on
+ // targets.
+ if (!TFI->canUseAsEpilogue(*NewRestore)) {
+ rollbackRestoreSplit(MF, NewRestore, InitRestore, DirtyPreds, TII);
+ return false;
+ }
+
+ Save = NewSave;
+ Restore = NewRestore;
+
+ MDT->runOnMachineFunction(MF);
+ MPDT->runOnMachineFunction(MF);
+
+ assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) &&
+ "Incorrect save or restore point due to dominance relations");
+ assert((!MLI->getLoopFor(Save) && !MLI->getLoopFor(Restore)) &&
+ "Unexpected save or restore point in a loop");
+ assert((EntryFreq >= MBFI->getBlockFreq(Save).getFrequency() &&
+ EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
+ "Incorrect save or restore point based on block frequency");
+ return true;
+}
+
void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
RegScavenger *RS) {
// Get rid of the easy cases first.
@@ -481,31 +801,7 @@ static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
return false;
}
-bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
- return false;
-
- LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
-
- init(MF);
-
- ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
- if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
- // If MF is irreducible, a block may be in a loop without
- // MachineLoopInfo reporting it. I.e., we may use the
- // post-dominance property in loops, which lead to incorrect
- // results. Moreover, we may miss that the prologue and
- // epilogue are not in the same loop, leading to unbalanced
- // construction/deconstruction of the stack frame.
- return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
- "Irreducible CFGs are not supported yet.",
- MF.getFunction().getSubprogram(), &MF.front());
- }
-
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- std::unique_ptr<RegScavenger> RS(
- TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
-
+bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) {
for (MachineBasicBlock &MBB : MF) {
LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' '
<< MBB.getName() << '\n');
@@ -521,7 +817,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
// are at least at the boundary of the save and restore points. The
// problem is that a basic block can jump out from the middle in these
// cases, which we do not handle.
- updateSaveRestorePoints(MBB, RS.get());
+ updateSaveRestorePoints(MBB, RS);
if (!ArePointsInteresting()) {
LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
return false;
@@ -530,11 +826,11 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
}
for (const MachineInstr &MI : MBB) {
- if (!useOrDefCSROrFI(MI, RS.get()))
+ if (!useOrDefCSROrFI(MI, RS))
continue;
// Save (resp. restore) point must dominate (resp. post dominate)
// MI. Look for the proper basic block for those.
- updateSaveRestorePoints(MBB, RS.get());
+ updateSaveRestorePoints(MBB, RS);
// If we are at a point where we cannot improve the placement of
// save/restore instructions, just give up.
if (!ArePointsInteresting()) {
@@ -588,13 +884,49 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
break;
NewBB = Restore;
}
- updateSaveRestorePoints(*NewBB, RS.get());
+ updateSaveRestorePoints(*NewBB, RS);
} while (Save && Restore);
if (!ArePointsInteresting()) {
++NumCandidatesDropped;
return false;
}
+ return true;
+}
+
+bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+ init(MF);
+
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
+ // If MF is irreducible, a block may be in a loop without
+ // MachineLoopInfo reporting it. I.e., we may use the
+ // post-dominance property in loops, which lead to incorrect
+ // results. Moreover, we may miss that the prologue and
+ // epilogue are not in the same loop, leading to unbalanced
+ // construction/deconstruction of the stack frame.
+ return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
+ "Irreducible CFGs are not supported yet.",
+ MF.getFunction().getSubprogram(), &MF.front());
+ }
+
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ std::unique_ptr<RegScavenger> RS(
+ TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
+
+ bool Changed = false;
+
+ bool HasCandidate = performShrinkWrapping(MF, RS.get());
+ Changed = postShrinkWrapping(HasCandidate, MF, RS.get());
+ if (!HasCandidate && !Changed)
+ return false;
+ if (!ArePointsInteresting())
+ return Changed;
LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
<< Save->getNumber() << ' ' << Save->getName()
@@ -605,7 +937,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
MFI.setSavePoint(Save);
MFI.setRestorePoint(Restore);
++NumCandidates;
- return false;
+ return Changed;
}
bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {
More information about the llvm-commits
mailing list