[llvm] 75882ed - [Codegen] (NFC) Faster algorithm for MachineBlockPlacement (#91843)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 13 19:13:42 PDT 2024


Author: William Junda Huang
Date: 2024-06-13T22:13:38-04:00
New Revision: 75882ed4c7126b33b1dabb08775af5ee0b2c6e12

URL: https://github.com/llvm/llvm-project/commit/75882ed4c7126b33b1dabb08775af5ee0b2c6e12
DIFF: https://github.com/llvm/llvm-project/commit/75882ed4c7126b33b1dabb08775af5ee0b2c6e12.diff

LOG: [Codegen] (NFC) Faster algorithm for MachineBlockPlacement (#91843)

In MachineBlockPlacement, the function getFirstUnplacedBlock is
inefficient because in most cases (for usual loop CFG), this function
fails to find a candidate, and its complexity becomes O(#(loops in
function) * #(blocks in function)). This makes the compilation of very
long functions slow. This update reduces it to O(k * #(blocks in
function)) where k is the maximum loop nesting depth, by iterating
through the BlockFilter instead.

Added: 
    

Modified: 
    llvm/lib/CodeGen/MachineBlockPlacement.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index d250981117c8f..1cb71f39efbe1 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -480,14 +480,16 @@ class MachineBlockPlacement : public MachineFunctionPass {
                                BlockFilterSet *BlockFilter);
   bool repeatedlyTailDuplicateBlock(
       MachineBasicBlock *BB, MachineBasicBlock *&LPred,
-      const MachineBasicBlock *LoopHeaderBB,
-      BlockChain &Chain, BlockFilterSet *BlockFilter,
-      MachineFunction::iterator &PrevUnplacedBlockIt);
-  bool maybeTailDuplicateBlock(
-      MachineBasicBlock *BB, MachineBasicBlock *LPred,
-      BlockChain &Chain, BlockFilterSet *BlockFilter,
+      const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
+      BlockFilterSet *BlockFilter,
       MachineFunction::iterator &PrevUnplacedBlockIt,
-      bool &DuplicatedToLPred);
+      BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt);
+  bool
+  maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
+                          BlockChain &Chain, BlockFilterSet *BlockFilter,
+                          MachineFunction::iterator &PrevUnplacedBlockIt,
+                          BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
+                          bool &DuplicatedToLPred);
   bool hasBetterLayoutPredecessor(
       const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
       const BlockChain &SuccChain, BranchProbability SuccProb,
@@ -498,10 +500,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
       const BlockFilterSet *BlockFilter);
   MachineBasicBlock *selectBestCandidateBlock(
       const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList);
-  MachineBasicBlock *getFirstUnplacedBlock(
-      const BlockChain &PlacedChain,
-      MachineFunction::iterator &PrevUnplacedBlockIt,
-      const BlockFilterSet *BlockFilter);
+  MachineBasicBlock *
+  getFirstUnplacedBlock(const BlockChain &PlacedChain,
+                        MachineFunction::iterator &PrevUnplacedBlockIt);
+  MachineBasicBlock *
+  getFirstUnplacedBlock(const BlockChain &PlacedChain,
+                        BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
+                        const BlockFilterSet *BlockFilter);
 
   /// Add a basic block to the work list if it is appropriate.
   ///
@@ -1761,7 +1766,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
   return BestBlock;
 }
 
-/// Retrieve the first unplaced basic block.
+/// Retrieve the first unplaced basic block in the entire function.
 ///
 /// This routine is called when we are unable to use the CFG to walk through
 /// all of the basic blocks and form a chain due to unnatural loops in the CFG.
@@ -1770,12 +1775,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
 /// re-scanning the entire sequence on repeated calls to this routine.
 MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
     const BlockChain &PlacedChain,
-    MachineFunction::iterator &PrevUnplacedBlockIt,
-    const BlockFilterSet *BlockFilter) {
+    MachineFunction::iterator &PrevUnplacedBlockIt) {
+
   for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E;
        ++I) {
-    if (BlockFilter && !BlockFilter->count(&*I))
-      continue;
     if (BlockToChain[&*I] != &PlacedChain) {
       PrevUnplacedBlockIt = I;
       // Now select the head of the chain to which the unplaced block belongs
@@ -1787,6 +1790,31 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
   return nullptr;
 }
 
+/// Retrieve the first unplaced basic block among the blocks in BlockFilter.
+///
+/// This is similar to getFirstUnplacedBlock for the entire function, but since
+/// the size of BlockFilter is typically far less than the number of blocks in
+/// the entire function, iterating through the BlockFilter is more efficient.
+/// When processing the entire funciton, using the version without BlockFilter
+/// has a complexity of #(loops in function) * #(blocks in function), while this
+/// version has a complexity of sum(#(loops in block) foreach block in function)
+/// which is always smaller. For long function mostly sequential in structure,
+/// the complexity is amortized to 1 * #(blocks in function).
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+    const BlockChain &PlacedChain,
+    BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
+    const BlockFilterSet *BlockFilter) {
+  assert(BlockFilter);
+  for (; PrevUnplacedBlockInFilterIt != BlockFilter->end();
+       ++PrevUnplacedBlockInFilterIt) {
+    BlockChain *C = BlockToChain[*PrevUnplacedBlockInFilterIt];
+    if (C != &PlacedChain) {
+      return *C->begin();
+    }
+  }
+  return nullptr;
+}
+
 void MachineBlockPlacement::fillWorkLists(
     const MachineBasicBlock *MBB,
     SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
@@ -1826,6 +1854,9 @@ void MachineBlockPlacement::buildChain(
   assert(HeadBB && "BB must not be null.\n");
   assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");
   MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
+  BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
+  if (BlockFilter)
+    PrevUnplacedBlockInFilterIt = BlockFilter->begin();
 
   const MachineBasicBlock *LoopHeaderBB = HeadBB;
   markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
@@ -1855,7 +1886,11 @@ void MachineBlockPlacement::buildChain(
       BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
 
     if (!BestSucc) {
-      BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
+      if (BlockFilter)
+        BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
+                                         BlockFilter);
+      else
+        BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
       if (!BestSucc)
         break;
 
@@ -1867,7 +1902,8 @@ void MachineBlockPlacement::buildChain(
     // Check for that now.
     if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
       repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
-                                       BlockFilter, PrevUnplacedBlockIt);
+                                   BlockFilter, PrevUnplacedBlockIt,
+                                   PrevUnplacedBlockInFilterIt);
       // If the chosen successor was duplicated into BB, don't bother laying
       // it out, just go round the loop again with BB as the chain end.
       if (!BB->isSuccessor(BestSucc))
@@ -3017,14 +3053,14 @@ void MachineBlockPlacement::alignBlocks() {
 /// @return true if \p BB was removed.
 bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
     MachineBasicBlock *BB, MachineBasicBlock *&LPred,
-    const MachineBasicBlock *LoopHeaderBB,
-    BlockChain &Chain, BlockFilterSet *BlockFilter,
-    MachineFunction::iterator &PrevUnplacedBlockIt) {
+    const MachineBasicBlock *LoopHeaderBB, BlockChain &Chain,
+    BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt,
+    BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
   bool Removed, DuplicatedToLPred;
   bool DuplicatedToOriginalLPred;
-  Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
-                                    PrevUnplacedBlockIt,
-                                    DuplicatedToLPred);
+  Removed = maybeTailDuplicateBlock(
+      BB, LPred, Chain, BlockFilter, PrevUnplacedBlockIt,
+      PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
   if (!Removed)
     return false;
   DuplicatedToOriginalLPred = DuplicatedToLPred;
@@ -3045,9 +3081,9 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
     if (ChainEnd == Chain.begin())
       break;
     DupPred = *std::prev(ChainEnd);
-    Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
-                                      PrevUnplacedBlockIt,
-                                      DuplicatedToLPred);
+    Removed = maybeTailDuplicateBlock(
+        DupBB, DupPred, Chain, BlockFilter, PrevUnplacedBlockIt,
+        PrevUnplacedBlockInFilterIt, DuplicatedToLPred);
   }
   // If BB was duplicated into LPred, it is now scheduled. But because it was
   // removed, markChainSuccessors won't be called for its chain. Instead we
@@ -3074,9 +3110,9 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
 /// \p DuplicatedToLPred - True if the block was duplicated into LPred.
 /// \return  - True if the block was duplicated into all preds and removed.
 bool MachineBlockPlacement::maybeTailDuplicateBlock(
-    MachineBasicBlock *BB, MachineBasicBlock *LPred,
-    BlockChain &Chain, BlockFilterSet *BlockFilter,
-    MachineFunction::iterator &PrevUnplacedBlockIt,
+    MachineBasicBlock *BB, MachineBasicBlock *LPred, BlockChain &Chain,
+    BlockFilterSet *BlockFilter, MachineFunction::iterator &PrevUnplacedBlockIt,
+    BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
     bool &DuplicatedToLPred) {
   DuplicatedToLPred = false;
   if (!shouldTailDuplicate(BB))
@@ -3118,7 +3154,25 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
 
         // Handle the filter set
         if (BlockFilter) {
-          BlockFilter->remove(RemBB);
+          auto It = llvm::find(*BlockFilter, RemBB);
+          // Erase RemBB from BlockFilter, and keep PrevUnplacedBlockInFilterIt
+          // pointing to the same element as before.
+          if (It != BlockFilter->end()) {
+            if (It < PrevUnplacedBlockInFilterIt) {
+              const MachineBasicBlock *PrevBB = *PrevUnplacedBlockInFilterIt;
+              // BlockFilter is a SmallVector so all elements after RemBB are
+              // shifted to the front by 1 after its deletion.
+              auto Distance = PrevUnplacedBlockInFilterIt - It - 1;
+              PrevUnplacedBlockInFilterIt = BlockFilter->erase(It) + Distance;
+              assert(*PrevUnplacedBlockInFilterIt == PrevBB);
+              (void)PrevBB;
+            } else if (It == PrevUnplacedBlockInFilterIt)
+              // The block pointed by PrevUnplacedBlockInFilterIt is erased, we
+              // have to set it to the next element.
+              PrevUnplacedBlockInFilterIt = BlockFilter->erase(It);
+            else
+              BlockFilter->erase(It);
+          }
         }
 
         // Remove the block from loop info.


        


More information about the llvm-commits mailing list