[llvm] 1569b36 - [CodeGen][ShrinkWrap] Split restore point

via llvm-commits llvm-commits at lists.llvm.org
Thu May 11 05:23:32 PDT 2023


Author: sgokhale
Date: 2023-05-11T17:51:48+05:30
New Revision: 1569b36ee9dda0fd8b23964ce8a969919f174f99

URL: https://github.com/llvm/llvm-project/commit/1569b36ee9dda0fd8b23964ce8a969919f174f99
DIFF: https://github.com/llvm/llvm-project/commit/1569b36ee9dda0fd8b23964ce8a969919f174f99.diff

LOG: [CodeGen][ShrinkWrap] Split restore point

Land D42600 with optimisation disabled by default by setting 'enable-shrink-wrap-region-split' option.

This is just to reduce effort involved in making changes to patch each time issue is detected and reland the whole patch.

Added: 
    

Modified: 
    llvm/lib/CodeGen/ShrinkWrap.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp
index b219b83bbc2f..a040bc7c89c6 100644
--- a/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -98,6 +98,9 @@ STATISTIC(NumCandidatesDropped,
 static cl::opt<cl::boolOrDefault>
 EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden,
                     cl::desc("enable the shrink-wrapping pass"));
+static cl::opt<bool> EnablePostShrinkWrapOpt(
+    "enable-shrink-wrap-region-split", cl::init(false), cl::Hidden,
+    cl::desc("enable splitting of the restore block if possible"));
 
 namespace {
 
@@ -185,6 +188,30 @@ class ShrinkWrap : public MachineFunctionPass {
   /// this call.
   void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS);
 
+  // Try to find safe point based on dominance and block frequency without
+  // any change in IR.
+  bool performShrinkWrapping(MachineFunction &MF, RegScavenger *RS);
+
+  /// This function tries to split the restore point if doing so can shrink the
+  /// save point further. \return True if restore point is split.
+  bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
+                          RegScavenger *RS);
+
+  /// This function analyzes if the restore point can split to create a new
+  /// restore point. This function collects
+  /// 1. Any preds of current restore that are reachable by callee save/FI
+  /// blocks
+  /// - indicated by DirtyPreds
+  /// 2. Any preds of current restore that are not DirtyPreds - indicated by
+  /// CleanPreds
+  /// Both sets should be non-empty for considering restore point split.
+  bool checkIfRestoreSplittable(
+      const MachineBasicBlock *CurRestore,
+      const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+      SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
+      SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
+      const TargetInstrInfo *TII, RegScavenger *RS);
+
   /// Initialize the pass for \p MF.
   void init(MachineFunction &MF) {
     RCI.runOnMachineFunction(MF);
@@ -338,18 +365,311 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
 /// Helper function to find the immediate (post) dominator.
 template <typename ListOfBBs, typename DominanceAnalysis>
 static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
-                                   DominanceAnalysis &Dom) {
+                                   DominanceAnalysis &Dom, bool Strict = true) {
   MachineBasicBlock *IDom = &Block;
   for (MachineBasicBlock *BB : BBs) {
     IDom = Dom.findNearestCommonDominator(IDom, BB);
     if (!IDom)
       break;
   }
-  if (IDom == &Block)
+  if (Strict && IDom == &Block)
     return nullptr;
   return IDom;
 }
 
+static bool isAnalyzableBB(const TargetInstrInfo &TII,
+                           MachineBasicBlock &Entry) {
+  // Check if the block is analyzable.
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+  SmallVector<MachineOperand, 4> Cond;
+  return !TII.analyzeBranch(Entry, TBB, FBB, Cond);
+}
+
+/// Determines if any predecessor of MBB is on the path from block that has use
+/// or def of CSRs/FI to MBB.
+/// ReachableByDirty: All blocks reachable from block that has use or def of
+/// CSR/FI.
+static bool
+hasDirtyPred(const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+             const MachineBasicBlock &MBB) {
+  for (const MachineBasicBlock *PredBB : MBB.predecessors())
+    if (ReachableByDirty.count(PredBB))
+      return true;
+  return false;
+}
+
+/// Derives the list of all the basic blocks reachable from MBB.
+static void markAllReachable(DenseSet<const MachineBasicBlock *> &Visited,
+                             const MachineBasicBlock &MBB) {
+  SmallVector<MachineBasicBlock *, 4> Worklist(MBB.succ_begin(),
+                                               MBB.succ_end());
+  Visited.insert(&MBB);
+  while (!Worklist.empty()) {
+    MachineBasicBlock *SuccMBB = Worklist.pop_back_val();
+    if (!Visited.insert(SuccMBB).second)
+      continue;
+    Worklist.append(SuccMBB->succ_begin(), SuccMBB->succ_end());
+  }
+}
+
+/// Collect blocks reachable by use or def of CSRs/FI.
+static void collectBlocksReachableByDirty(
+    const DenseSet<const MachineBasicBlock *> &DirtyBBs,
+    DenseSet<const MachineBasicBlock *> &ReachableByDirty) {
+  for (const MachineBasicBlock *MBB : DirtyBBs) {
+    if (ReachableByDirty.count(MBB))
+      continue;
+    // Mark all offsprings as reachable.
+    markAllReachable(ReachableByDirty, *MBB);
+  }
+}
+
+/// \return true if there is a clean path from SavePoint to the original
+/// Restore.
+static bool
+isSaveReachableThroughClean(const MachineBasicBlock *SavePoint,
+                            ArrayRef<MachineBasicBlock *> CleanPreds) {
+  DenseSet<const MachineBasicBlock *> Visited;
+  SmallVector<MachineBasicBlock *, 4> Worklist(CleanPreds.begin(),
+                                               CleanPreds.end());
+  while (!Worklist.empty()) {
+    MachineBasicBlock *CleanBB = Worklist.pop_back_val();
+    if (CleanBB == SavePoint)
+      return true;
+    if (!Visited.insert(CleanBB).second || !CleanBB->pred_size())
+      continue;
+    Worklist.append(CleanBB->pred_begin(), CleanBB->pred_end());
+  }
+  return false;
+}
+
+/// This function updates the branches post restore point split.
+///
+/// Restore point has been split.
+/// Old restore point: MBB
+/// New restore point: NMBB
+/// Any basic block(say BBToUpdate) which had a fallthrough to MBB
+/// previously should
+/// 1. Fallthrough to NMBB iff NMBB is inserted immediately above MBB in the
+/// block layout OR
+/// 2. Branch unconditionally to NMBB iff NMBB is inserted at any other place.
+static void updateTerminator(MachineBasicBlock *BBToUpdate,
+                             MachineBasicBlock *NMBB,
+                             const TargetInstrInfo *TII) {
+  DebugLoc DL = BBToUpdate->findBranchDebugLoc();
+  // if NMBB isn't the new layout successor for BBToUpdate, insert unconditional
+  // branch to it
+  if (!BBToUpdate->isLayoutSuccessor(NMBB))
+    TII->insertUnconditionalBranch(*BBToUpdate, NMBB, DL);
+}
+
+/// This function splits the restore point and returns new restore point/BB.
+///
+/// DirtyPreds: Predessors of \p MBB that are ReachableByDirty
+///
+/// Decision has been made to split the restore point.
+/// old restore point: \p MBB
+/// new restore point: \p NMBB
+/// This function makes the necessary block layout changes so that
+/// 1. \p NMBB points to \p MBB unconditionally
+/// 2. All dirtyPreds that previously pointed to \p MBB point to \p NMBB
+static MachineBasicBlock *
+tryToSplitRestore(MachineBasicBlock *MBB,
+                  ArrayRef<MachineBasicBlock *> DirtyPreds,
+                  const TargetInstrInfo *TII) {
+  MachineFunction *MF = MBB->getParent();
+
+  // get the list of DirtyPreds who have a fallthrough to MBB
+  // before the block layout change. This is just to ensure that if the NMBB is
+  // inserted after MBB, then we create unconditional branch from
+  // DirtyPred/CleanPred to NMBB
+  SmallPtrSet<MachineBasicBlock *, 8> MBBFallthrough;
+  for (MachineBasicBlock *BB : DirtyPreds)
+    if (BB->getFallThrough(false) == MBB)
+      MBBFallthrough.insert(BB);
+
+  MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+  // Insert this block at the end of the function. Inserting in between may
+  // interfere with control flow optimizer decisions.
+  MF->insert(MF->end(), NMBB);
+
+  for (const MachineBasicBlock::RegisterMaskPair &LI : MBB->liveins())
+    NMBB->addLiveIn(LI.PhysReg);
+
+  TII->insertUnconditionalBranch(*NMBB, MBB, DebugLoc());
+
+  // After splitting, all predecessors of the restore point should be dirty
+  // blocks.
+  for (MachineBasicBlock *SuccBB : DirtyPreds)
+    SuccBB->ReplaceUsesOfBlockWith(MBB, NMBB);
+
+  NMBB->addSuccessor(MBB);
+
+  for (MachineBasicBlock *BBToUpdate : MBBFallthrough)
+    updateTerminator(BBToUpdate, NMBB, TII);
+
+  return NMBB;
+}
+
+/// This function undoes the restore point split done earlier.
+///
+/// DirtyPreds: All predecessors of \p NMBB that are ReachableByDirty.
+///
+/// Restore point was split and the change needs to be unrolled. Make necessary
+/// changes to reset restore point from \p NMBB to \p MBB.
+static void rollbackRestoreSplit(MachineFunction &MF, MachineBasicBlock *NMBB,
+                                 MachineBasicBlock *MBB,
+                                 ArrayRef<MachineBasicBlock *> DirtyPreds,
+                                 const TargetInstrInfo *TII) {
+  // For a BB, if NMBB is fallthrough in the current layout, then in the new
+  // layout a. BB should fallthrough to MBB OR b. BB should undconditionally
+  // branch to MBB
+  SmallPtrSet<MachineBasicBlock *, 8> NMBBFallthrough;
+  for (MachineBasicBlock *BB : DirtyPreds)
+    if (BB->getFallThrough(false) == NMBB)
+      NMBBFallthrough.insert(BB);
+
+  NMBB->removeSuccessor(MBB);
+  for (MachineBasicBlock *SuccBB : DirtyPreds)
+    SuccBB->ReplaceUsesOfBlockWith(NMBB, MBB);
+
+  NMBB->erase(NMBB->begin(), NMBB->end());
+  NMBB->eraseFromParent();
+
+  for (MachineBasicBlock *BBToUpdate : NMBBFallthrough)
+    updateTerminator(BBToUpdate, MBB, TII);
+}
+
+// A block is deemed fit for restore point split iff there exist
+// 1. DirtyPreds - preds of CurRestore reachable from use or def of CSR/FI
+// 2. CleanPreds - preds of CurRestore that arent DirtyPreds
+bool ShrinkWrap::checkIfRestoreSplittable(
+    const MachineBasicBlock *CurRestore,
+    const DenseSet<const MachineBasicBlock *> &ReachableByDirty,
+    SmallVectorImpl<MachineBasicBlock *> &DirtyPreds,
+    SmallVectorImpl<MachineBasicBlock *> &CleanPreds,
+    const TargetInstrInfo *TII, RegScavenger *RS) {
+  for (const MachineInstr &MI : *CurRestore)
+    if (useOrDefCSROrFI(MI, RS))
+      return false;
+
+  for (MachineBasicBlock *PredBB : CurRestore->predecessors()) {
+    if (!isAnalyzableBB(*TII, *PredBB))
+      return false;
+
+    if (ReachableByDirty.count(PredBB))
+      DirtyPreds.push_back(PredBB);
+    else
+      CleanPreds.push_back(PredBB);
+  }
+
+  return !(CleanPreds.empty() || DirtyPreds.empty());
+}
+
+bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF,
+                                    RegScavenger *RS) {
+  if (!EnablePostShrinkWrapOpt)
+    return false;
+
+  MachineBasicBlock *InitSave = nullptr;
+  MachineBasicBlock *InitRestore = nullptr;
+
+  if (HasCandidate) {
+    InitSave = Save;
+    InitRestore = Restore;
+  } else {
+    InitRestore = nullptr;
+    InitSave = &MF.front();
+    for (MachineBasicBlock &MBB : MF) {
+      if (MBB.isEHFuncletEntry())
+        return false;
+      if (MBB.isReturnBlock()) {
+        // Do not support multiple restore points.
+        if (InitRestore)
+          return false;
+        InitRestore = &MBB;
+      }
+    }
+  }
+
+  if (!InitSave || !InitRestore || InitRestore == InitSave ||
+      !MDT->dominates(InitSave, InitRestore) ||
+      !MPDT->dominates(InitRestore, InitSave))
+    return false;
+
+  // Bail out of the optimization if any of the basic block is target of
+  // INLINEASM_BR instruction
+  for (MachineBasicBlock &MBB : MF)
+    if (MBB.isInlineAsmBrIndirectTarget())
+      return false;
+
+  DenseSet<const MachineBasicBlock *> DirtyBBs;
+  for (MachineBasicBlock &MBB : MF) {
+    if (MBB.isEHPad()) {
+      DirtyBBs.insert(&MBB);
+      continue;
+    }
+    for (const MachineInstr &MI : MBB)
+      if (useOrDefCSROrFI(MI, RS)) {
+        DirtyBBs.insert(&MBB);
+        break;
+      }
+  }
+
+  // Find blocks reachable from the use or def of CSRs/FI.
+  DenseSet<const MachineBasicBlock *> ReachableByDirty;
+  collectBlocksReachableByDirty(DirtyBBs, ReachableByDirty);
+
+  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  SmallVector<MachineBasicBlock *, 2> DirtyPreds;
+  SmallVector<MachineBasicBlock *, 2> CleanPreds;
+  if (!checkIfRestoreSplittable(InitRestore, ReachableByDirty, DirtyPreds,
+                                CleanPreds, TII, RS))
+    return false;
+
+  // Trying to reach out to the new save point which dominates all dirty blocks.
+  MachineBasicBlock *NewSave =
+      FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false);
+
+  while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) ||
+                     EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency()))
+    NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT,
+                         false);
+
+  const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
+  if (!NewSave || NewSave == InitSave ||
+      isSaveReachableThroughClean(NewSave, CleanPreds) ||
+      !TFI->canUseAsPrologue(*NewSave))
+    return false;
+
+  // Now we know that splitting a restore point can isolate the restore point
+  // from clean blocks and doing so can shrink the save point.
+  MachineBasicBlock *NewRestore =
+      tryToSplitRestore(InitRestore, DirtyPreds, TII);
+
+  // Make sure if the new restore point is valid as an epilogue, depending on
+  // targets.
+  if (!TFI->canUseAsEpilogue(*NewRestore)) {
+    rollbackRestoreSplit(MF, NewRestore, InitRestore, DirtyPreds, TII);
+    return false;
+  }
+
+  Save = NewSave;
+  Restore = NewRestore;
+
+  MDT->runOnMachineFunction(MF);
+  MPDT->runOnMachineFunction(MF);
+
+  assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) &&
+         "Incorrect save or restore point due to dominance relations");
+  assert((!MLI->getLoopFor(Save) && !MLI->getLoopFor(Restore)) &&
+         "Unexpected save or restore point in a loop");
+  assert((EntryFreq >= MBFI->getBlockFreq(Save).getFrequency() &&
+          EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) &&
+         "Incorrect save or restore point based on block frequency");
+  return true;
+}
+
 void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
                                          RegScavenger *RS) {
   // Get rid of the easy cases first.
@@ -481,31 +801,7 @@ static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE,
   return false;
 }
 
-bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
-    return false;
-
-  LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
-
-  init(MF);
-
-  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
-  if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
-    // If MF is irreducible, a block may be in a loop without
-    // MachineLoopInfo reporting it. I.e., we may use the
-    // post-dominance property in loops, which lead to incorrect
-    // results. Moreover, we may miss that the prologue and
-    // epilogue are not in the same loop, leading to unbalanced
-    // construction/deconstruction of the stack frame.
-    return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
-                             "Irreducible CFGs are not supported yet.",
-                             MF.getFunction().getSubprogram(), &MF.front());
-  }
-
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  std::unique_ptr<RegScavenger> RS(
-      TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
-
+bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) {
   for (MachineBasicBlock &MBB : MF) {
     LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' '
                       << MBB.getName() << '\n');
@@ -521,7 +817,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
       // are at least at the boundary of the save and restore points.  The
       // problem is that a basic block can jump out from the middle in these
       // cases, which we do not handle.
-      updateSaveRestorePoints(MBB, RS.get());
+      updateSaveRestorePoints(MBB, RS);
       if (!ArePointsInteresting()) {
         LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n");
         return false;
@@ -530,11 +826,11 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
     }
 
     for (const MachineInstr &MI : MBB) {
-      if (!useOrDefCSROrFI(MI, RS.get()))
+      if (!useOrDefCSROrFI(MI, RS))
         continue;
       // Save (resp. restore) point must dominate (resp. post dominate)
       // MI. Look for the proper basic block for those.
-      updateSaveRestorePoints(MBB, RS.get());
+      updateSaveRestorePoints(MBB, RS);
       // If we are at a point where we cannot improve the placement of
       // save/restore instructions, just give up.
       if (!ArePointsInteresting()) {
@@ -588,13 +884,49 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
         break;
       NewBB = Restore;
     }
-    updateSaveRestorePoints(*NewBB, RS.get());
+    updateSaveRestorePoints(*NewBB, RS);
   } while (Save && Restore);
 
   if (!ArePointsInteresting()) {
     ++NumCandidatesDropped;
     return false;
   }
+  return true;
+}
+
+bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF))
+    return false;
+
+  LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n');
+
+  init(MF);
+
+  ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+  if (containsIrreducibleCFG<MachineBasicBlock *>(RPOT, *MLI)) {
+    // If MF is irreducible, a block may be in a loop without
+    // MachineLoopInfo reporting it. I.e., we may use the
+    // post-dominance property in loops, which lead to incorrect
+    // results. Moreover, we may miss that the prologue and
+    // epilogue are not in the same loop, leading to unbalanced
+    // construction/deconstruction of the stack frame.
+    return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG",
+                             "Irreducible CFGs are not supported yet.",
+                             MF.getFunction().getSubprogram(), &MF.front());
+  }
+
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  std::unique_ptr<RegScavenger> RS(
+      TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
+
+  bool Changed = false;
+
+  bool HasCandidate = performShrinkWrapping(MF, RS.get());
+  Changed = postShrinkWrapping(HasCandidate, MF, RS.get());
+  if (!HasCandidate && !Changed)
+    return false;
+  if (!ArePointsInteresting())
+    return Changed;
 
   LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: "
                     << Save->getNumber() << ' ' << Save->getName()
@@ -605,7 +937,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
   MFI.setSavePoint(Save);
   MFI.setRestorePoint(Restore);
   ++NumCandidates;
-  return false;
+  return Changed;
 }
 
 bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) {


        


More information about the llvm-commits mailing list