[llvm] [Codegen] (NFC) Faster algorithm for MachineBlockPlacement (PR #91843)

William Junda Huang via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 4 19:53:42 PDT 2024


https://github.com/huangjd updated https://github.com/llvm/llvm-project/pull/91843

>From 445726607f7a14e9c3fe31261b9ca925888d52b9 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Sat, 11 May 2024 02:12:53 -0400
Subject: [PATCH 1/4] [Codegen] Faster algorithm for MachineBlockPlacement

In MachineBlockPlacement, the function getFirstUnplacedBlock is
inefficient because in most cases (for usual loop CFG), this function
fails to find a candidate, and its complexity becomes O(#(loops in
function) * #(blocks in function)). This makes the compilation of very
long functions slow. This update reduces it to O(k * #(blocks in
function)) where k is the maximum loop nesting depth, by iterating
through the BlockFilter instead.
---
 llvm/lib/CodeGen/MachineBlockPlacement.cpp | 62 ++++++++++++++++++----
 1 file changed, 52 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index c0cdeab25f1ca..df32add0ba849 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -482,11 +482,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
       MachineBasicBlock *BB, MachineBasicBlock *&LPred,
       const MachineBasicBlock *LoopHeaderBB,
       BlockChain &Chain, BlockFilterSet *BlockFilter,
-      MachineFunction::iterator &PrevUnplacedBlockIt);
+      MachineFunction::iterator &PrevUnplacedBlockIt,
+      BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt);
   bool maybeTailDuplicateBlock(
       MachineBasicBlock *BB, MachineBasicBlock *LPred,
       BlockChain &Chain, BlockFilterSet *BlockFilter,
       MachineFunction::iterator &PrevUnplacedBlockIt,
+      BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
       bool &DuplicatedToLPred);
   bool hasBetterLayoutPredecessor(
       const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
@@ -500,7 +502,10 @@ class MachineBlockPlacement : public MachineFunctionPass {
       const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList);
   MachineBasicBlock *getFirstUnplacedBlock(
       const BlockChain &PlacedChain,
-      MachineFunction::iterator &PrevUnplacedBlockIt,
+      MachineFunction::iterator &PrevUnplacedBlockIt);
+  MachineBasicBlock *getFirstUnplacedBlock(
+      const BlockChain &PlacedChain,
+      BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
       const BlockFilterSet *BlockFilter);
 
   /// Add a basic block to the work list if it is appropriate.
@@ -1761,7 +1766,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
   return BestBlock;
 }
 
-/// Retrieve the first unplaced basic block.
+/// Retrieve the first unplaced basic block in the entire function.
 ///
 /// This routine is called when we are unable to use the CFG to walk through
 /// all of the basic blocks and form a chain due to unnatural loops in the CFG.
@@ -1770,12 +1775,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
 /// re-scanning the entire sequence on repeated calls to this routine.
 MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
     const BlockChain &PlacedChain,
-    MachineFunction::iterator &PrevUnplacedBlockIt,
-    const BlockFilterSet *BlockFilter) {
+    MachineFunction::iterator &PrevUnplacedBlockIt) {
+
   for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E;
        ++I) {
-    if (BlockFilter && !BlockFilter->count(&*I))
-      continue;
     if (BlockToChain[&*I] != &PlacedChain) {
       PrevUnplacedBlockIt = I;
       // Now select the head of the chain to which the unplaced block belongs
@@ -1787,6 +1790,31 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
   return nullptr;
 }
 
+/// Retrieve the first unplaced basic block among the blocks in BlockFilter.
+///
+/// This is similar to getFirstUnplacedBlock for the entire function, but since
+/// the size of BlockFilter is typically far less than the number of blocks in
+/// the entire function, iterating through the BlockFilter is more efficient.
+/// When processing the entire funciton, using the version without BlockFilter
+/// has a complexity of #(loops in function) * #(blocks in function), while this
+/// version has a complexity of sum(#(loops in block) foreach block in function)
+/// which is always smaller. For long function mostly sequential in structure,
+/// the complexity is amortized to 1 * #(blocks in function).
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+    const BlockChain &PlacedChain,
+    BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
+    const BlockFilterSet *BlockFilter) {
+  assert(BlockFilter);
+  for (;PrevUnplacedBlockInFilterIt != BlockFilter->end();
+       ++PrevUnplacedBlockInFilterIt) {
+    BlockChain *C = BlockToChain[*PrevUnplacedBlockInFilterIt];
+    if (C != &PlacedChain) {
+      return *C->begin();
+    }
+  }
+  return nullptr;
+}
+
 void MachineBlockPlacement::fillWorkLists(
     const MachineBasicBlock *MBB,
     SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
@@ -1826,6 +1854,9 @@ void MachineBlockPlacement::buildChain(
   assert(HeadBB && "BB must not be null.\n");
   assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");
   MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
+  BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
+  if (BlockFilter)
+    PrevUnplacedBlockInFilterIt = BlockFilter->begin();
 
   const MachineBasicBlock *LoopHeaderBB = HeadBB;
   markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
@@ -1855,7 +1886,11 @@ void MachineBlockPlacement::buildChain(
       BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
 
     if (!BestSucc) {
-      BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
+      if (BlockFilter)
+        BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
+                                         BlockFilter);
+      else
+        BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
       if (!BestSucc)
         break;
 
@@ -1867,7 +1902,8 @@ void MachineBlockPlacement::buildChain(
     // Check for that now.
     if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
       repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
-                                       BlockFilter, PrevUnplacedBlockIt);
+                                   BlockFilter, PrevUnplacedBlockIt,
+                                   PrevUnplacedBlockInFilterIt);
       // If the chosen successor was duplicated into BB, don't bother laying
       // it out, just go round the loop again with BB as the chain end.
       if (!BB->isSuccessor(BestSucc))
@@ -3019,11 +3055,13 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
     MachineBasicBlock *BB, MachineBasicBlock *&LPred,
     const MachineBasicBlock *LoopHeaderBB,
     BlockChain &Chain, BlockFilterSet *BlockFilter,
-    MachineFunction::iterator &PrevUnplacedBlockIt) {
+    MachineFunction::iterator &PrevUnplacedBlockIt,
+    BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
   bool Removed, DuplicatedToLPred;
   bool DuplicatedToOriginalLPred;
   Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
                                     PrevUnplacedBlockIt,
+                                    PrevUnplacedBlockInFilterIt,
                                     DuplicatedToLPred);
   if (!Removed)
     return false;
@@ -3047,6 +3085,7 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
     DupPred = *std::prev(ChainEnd);
     Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
                                       PrevUnplacedBlockIt,
+                                      PrevUnplacedBlockInFilterIt,
                                       DuplicatedToLPred);
   }
   // If BB was duplicated into LPred, it is now scheduled. But because it was
@@ -3077,6 +3116,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
     MachineBasicBlock *BB, MachineBasicBlock *LPred,
     BlockChain &Chain, BlockFilterSet *BlockFilter,
     MachineFunction::iterator &PrevUnplacedBlockIt,
+    BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
     bool &DuplicatedToLPred) {
   DuplicatedToLPred = false;
   if (!shouldTailDuplicate(BB))
@@ -3119,6 +3159,8 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
         // Handle the filter set
         if (BlockFilter) {
           BlockFilter->remove(RemBB);
+          if (*PrevUnplacedBlockInFilterIt == RemBB)
+            PrevUnplacedBlockInFilterIt++;
         }
 
         // Remove the block from loop info.

>From 46940e6e019648b5797976a2ff76a81add136f43 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Mon, 3 Jun 2024 22:29:32 -0400
Subject: [PATCH 2/4] Fixed iterator use after remove

---
 llvm/lib/CodeGen/MachineBlockPlacement.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index df32add0ba849..364d2c6a8c6bf 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3158,9 +3158,9 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
 
         // Handle the filter set
         if (BlockFilter) {
-          BlockFilter->remove(RemBB);
           if (*PrevUnplacedBlockInFilterIt == RemBB)
             PrevUnplacedBlockInFilterIt++;
+          BlockFilter->remove(RemBB);
         }
 
         // Remove the block from loop info.

>From 4e50833579d1d969980d21a93385d7a608c8b12a Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Tue, 4 Jun 2024 22:32:14 -0400
Subject: [PATCH 3/4] Fixed iterator usage in BlockFilter->remove()

---
 llvm/lib/CodeGen/MachineBlockPlacement.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 364d2c6a8c6bf..fabf1386eb62b 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3157,11 +3157,8 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
         }
 
         // Handle the filter set
-        if (BlockFilter) {
-          if (*PrevUnplacedBlockInFilterIt == RemBB)
-            PrevUnplacedBlockInFilterIt++;
+        if (BlockFilter)
           BlockFilter->remove(RemBB);
-        }
 
         // Remove the block from loop info.
         MLI->removeBlock(RemBB);

>From ba0e78cd11a079d7d8707ed59feec4029c80274c Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Tue, 4 Jun 2024 22:53:11 -0400
Subject: [PATCH 4/4] Remove now unused argument in
 repeatedlyTailDuplicateBlock

---
 llvm/lib/CodeGen/MachineBlockPlacement.cpp | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index fabf1386eb62b..518e5194285e9 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -482,13 +482,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
       MachineBasicBlock *BB, MachineBasicBlock *&LPred,
       const MachineBasicBlock *LoopHeaderBB,
       BlockChain &Chain, BlockFilterSet *BlockFilter,
-      MachineFunction::iterator &PrevUnplacedBlockIt,
-      BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt);
+      MachineFunction::iterator &PrevUnplacedBlockIt);
   bool maybeTailDuplicateBlock(
       MachineBasicBlock *BB, MachineBasicBlock *LPred,
       BlockChain &Chain, BlockFilterSet *BlockFilter,
       MachineFunction::iterator &PrevUnplacedBlockIt,
-      BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
       bool &DuplicatedToLPred);
   bool hasBetterLayoutPredecessor(
       const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
@@ -1902,8 +1900,7 @@ void MachineBlockPlacement::buildChain(
     // Check for that now.
     if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
       repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
-                                   BlockFilter, PrevUnplacedBlockIt,
-                                   PrevUnplacedBlockInFilterIt);
+                                       BlockFilter, PrevUnplacedBlockIt);
       // If the chosen successor was duplicated into BB, don't bother laying
       // it out, just go round the loop again with BB as the chain end.
       if (!BB->isSuccessor(BestSucc))
@@ -3055,13 +3052,11 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
     MachineBasicBlock *BB, MachineBasicBlock *&LPred,
     const MachineBasicBlock *LoopHeaderBB,
     BlockChain &Chain, BlockFilterSet *BlockFilter,
-    MachineFunction::iterator &PrevUnplacedBlockIt,
-    BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
+    MachineFunction::iterator &PrevUnplacedBlockIt) {
   bool Removed, DuplicatedToLPred;
   bool DuplicatedToOriginalLPred;
   Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
                                     PrevUnplacedBlockIt,
-                                    PrevUnplacedBlockInFilterIt,
                                     DuplicatedToLPred);
   if (!Removed)
     return false;
@@ -3085,7 +3080,6 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
     DupPred = *std::prev(ChainEnd);
     Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
                                       PrevUnplacedBlockIt,
-                                      PrevUnplacedBlockInFilterIt,
                                       DuplicatedToLPred);
   }
   // If BB was duplicated into LPred, it is now scheduled. But because it was
@@ -3116,7 +3110,6 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
     MachineBasicBlock *BB, MachineBasicBlock *LPred,
     BlockChain &Chain, BlockFilterSet *BlockFilter,
     MachineFunction::iterator &PrevUnplacedBlockIt,
-    BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
     bool &DuplicatedToLPred) {
   DuplicatedToLPred = false;
   if (!shouldTailDuplicate(BB))
@@ -3157,8 +3150,9 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
         }
 
         // Handle the filter set
-        if (BlockFilter)
+        if (BlockFilter) {
           BlockFilter->remove(RemBB);
+        }
 
         // Remove the block from loop info.
         MLI->removeBlock(RemBB);



More information about the llvm-commits mailing list