[llvm] [Codegen] (NFC) Faster algorithm for MachineBlockPlacement (PR #91843)
William Junda Huang via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 4 19:33:29 PDT 2024
https://github.com/huangjd updated https://github.com/llvm/llvm-project/pull/91843
>From 445726607f7a14e9c3fe31261b9ca925888d52b9 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Sat, 11 May 2024 02:12:53 -0400
Subject: [PATCH 1/3] [Codegen] Faster algorithm for MachineBlockPlacement
In MachineBlockPlacement, the function getFirstUnplacedBlock is
inefficient because in most cases (for usual loop CFG), this function
fails to find a candidate, and its complexity becomes O(#(loops in
function) * #(blocks in function)). This makes the compilation of very
long functions slow. This update reduces it to O(k * #(blocks in
function)) where k is the maximum loop nesting depth, by iterating
through the BlockFilter instead.
---
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 62 ++++++++++++++++++----
1 file changed, 52 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index c0cdeab25f1ca..df32add0ba849 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -482,11 +482,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
const MachineBasicBlock *LoopHeaderBB,
BlockChain &Chain, BlockFilterSet *BlockFilter,
- MachineFunction::iterator &PrevUnplacedBlockIt);
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt);
bool maybeTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *LPred,
BlockChain &Chain, BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt,
+ BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
bool &DuplicatedToLPred);
bool hasBetterLayoutPredecessor(
const MachineBasicBlock *BB, const MachineBasicBlock *Succ,
@@ -500,7 +502,10 @@ class MachineBlockPlacement : public MachineFunctionPass {
const BlockChain &Chain, SmallVectorImpl<MachineBasicBlock *> &WorkList);
MachineBasicBlock *getFirstUnplacedBlock(
const BlockChain &PlacedChain,
- MachineFunction::iterator &PrevUnplacedBlockIt,
+ MachineFunction::iterator &PrevUnplacedBlockIt);
+ MachineBasicBlock *getFirstUnplacedBlock(
+ const BlockChain &PlacedChain,
+ BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
const BlockFilterSet *BlockFilter);
/// Add a basic block to the work list if it is appropriate.
@@ -1761,7 +1766,7 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
return BestBlock;
}
-/// Retrieve the first unplaced basic block.
+/// Retrieve the first unplaced basic block in the entire function.
///
/// This routine is called when we are unable to use the CFG to walk through
/// all of the basic blocks and form a chain due to unnatural loops in the CFG.
@@ -1770,12 +1775,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
/// re-scanning the entire sequence on repeated calls to this routine.
MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
const BlockChain &PlacedChain,
- MachineFunction::iterator &PrevUnplacedBlockIt,
- const BlockFilterSet *BlockFilter) {
+ MachineFunction::iterator &PrevUnplacedBlockIt) {
+
for (MachineFunction::iterator I = PrevUnplacedBlockIt, E = F->end(); I != E;
++I) {
- if (BlockFilter && !BlockFilter->count(&*I))
- continue;
if (BlockToChain[&*I] != &PlacedChain) {
PrevUnplacedBlockIt = I;
// Now select the head of the chain to which the unplaced block belongs
@@ -1787,6 +1790,31 @@ MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
return nullptr;
}
+/// Retrieve the first unplaced basic block among the blocks in BlockFilter.
+///
+/// This is similar to getFirstUnplacedBlock for the entire function, but since
+/// the size of BlockFilter is typically far less than the number of blocks in
+/// the entire function, iterating through the BlockFilter is more efficient.
+/// When processing the entire funciton, using the version without BlockFilter
+/// has a complexity of #(loops in function) * #(blocks in function), while this
+/// version has a complexity of sum(#(loops in block) foreach block in function)
+/// which is always smaller. For long function mostly sequential in structure,
+/// the complexity is amortized to 1 * #(blocks in function).
+MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock(
+ const BlockChain &PlacedChain,
+ BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
+ const BlockFilterSet *BlockFilter) {
+ assert(BlockFilter);
+ for (;PrevUnplacedBlockInFilterIt != BlockFilter->end();
+ ++PrevUnplacedBlockInFilterIt) {
+ BlockChain *C = BlockToChain[*PrevUnplacedBlockInFilterIt];
+ if (C != &PlacedChain) {
+ return *C->begin();
+ }
+ }
+ return nullptr;
+}
+
void MachineBlockPlacement::fillWorkLists(
const MachineBasicBlock *MBB,
SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
@@ -1826,6 +1854,9 @@ void MachineBlockPlacement::buildChain(
assert(HeadBB && "BB must not be null.\n");
assert(BlockToChain[HeadBB] == &Chain && "BlockToChainMap mis-match.\n");
MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
+ BlockFilterSet::iterator PrevUnplacedBlockInFilterIt;
+ if (BlockFilter)
+ PrevUnplacedBlockInFilterIt = BlockFilter->begin();
const MachineBasicBlock *LoopHeaderBB = HeadBB;
markChainSuccessors(Chain, LoopHeaderBB, BlockFilter);
@@ -1855,7 +1886,11 @@ void MachineBlockPlacement::buildChain(
BestSucc = selectBestCandidateBlock(Chain, EHPadWorkList);
if (!BestSucc) {
- BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt, BlockFilter);
+ if (BlockFilter)
+ BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockInFilterIt,
+ BlockFilter);
+ else
+ BestSucc = getFirstUnplacedBlock(Chain, PrevUnplacedBlockIt);
if (!BestSucc)
break;
@@ -1867,7 +1902,8 @@ void MachineBlockPlacement::buildChain(
// Check for that now.
if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
- BlockFilter, PrevUnplacedBlockIt);
+ BlockFilter, PrevUnplacedBlockIt,
+ PrevUnplacedBlockInFilterIt);
// If the chosen successor was duplicated into BB, don't bother laying
// it out, just go round the loop again with BB as the chain end.
if (!BB->isSuccessor(BestSucc))
@@ -3019,11 +3055,13 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *&LPred,
const MachineBasicBlock *LoopHeaderBB,
BlockChain &Chain, BlockFilterSet *BlockFilter,
- MachineFunction::iterator &PrevUnplacedBlockIt) {
+ MachineFunction::iterator &PrevUnplacedBlockIt,
+ BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt) {
bool Removed, DuplicatedToLPred;
bool DuplicatedToOriginalLPred;
Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
PrevUnplacedBlockIt,
+ PrevUnplacedBlockInFilterIt,
DuplicatedToLPred);
if (!Removed)
return false;
@@ -3047,6 +3085,7 @@ bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
DupPred = *std::prev(ChainEnd);
Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
PrevUnplacedBlockIt,
+ PrevUnplacedBlockInFilterIt,
DuplicatedToLPred);
}
// If BB was duplicated into LPred, it is now scheduled. But because it was
@@ -3077,6 +3116,7 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
MachineBasicBlock *BB, MachineBasicBlock *LPred,
BlockChain &Chain, BlockFilterSet *BlockFilter,
MachineFunction::iterator &PrevUnplacedBlockIt,
+ BlockFilterSet::iterator &PrevUnplacedBlockInFilterIt,
bool &DuplicatedToLPred) {
DuplicatedToLPred = false;
if (!shouldTailDuplicate(BB))
@@ -3119,6 +3159,8 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
// Handle the filter set
if (BlockFilter) {
BlockFilter->remove(RemBB);
+ if (*PrevUnplacedBlockInFilterIt == RemBB)
+ PrevUnplacedBlockInFilterIt++;
}
// Remove the block from loop info.
>From 46940e6e019648b5797976a2ff76a81add136f43 Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Mon, 3 Jun 2024 22:29:32 -0400
Subject: [PATCH 2/3] Fixed iterator use after remove
---
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index df32add0ba849..364d2c6a8c6bf 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3158,9 +3158,9 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
// Handle the filter set
if (BlockFilter) {
- BlockFilter->remove(RemBB);
if (*PrevUnplacedBlockInFilterIt == RemBB)
PrevUnplacedBlockInFilterIt++;
+ BlockFilter->remove(RemBB);
}
// Remove the block from loop info.
>From 4e50833579d1d969980d21a93385d7a608c8b12a Mon Sep 17 00:00:00 2001
From: William Huang <williamjhuang at google.com>
Date: Tue, 4 Jun 2024 22:32:14 -0400
Subject: [PATCH 3/3] Fixed iterator usage in BlockFilter->remove()
---
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 5 +----
1 file changed, 1 insertion(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 364d2c6a8c6bf..fabf1386eb62b 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3157,11 +3157,8 @@ bool MachineBlockPlacement::maybeTailDuplicateBlock(
}
// Handle the filter set
- if (BlockFilter) {
- if (*PrevUnplacedBlockInFilterIt == RemBB)
- PrevUnplacedBlockInFilterIt++;
+ if (BlockFilter)
BlockFilter->remove(RemBB);
- }
// Remove the block from loop info.
MLI->removeBlock(RemBB);
More information about the llvm-commits
mailing list