[llvm-branch-commits] [llvm] [FixIrreducible] Use CycleInfo instead of a custom SCC traversal (PR #103014)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 13 00:06:44 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-adt
Author: Sameer Sahasrabuddhe (ssahasra)
<details>
<summary>Changes</summary>
1. CycleInfo efficiently locates all cycles in a single pass, while the SCC is repeated inside every natural loop.
2. CycleInfo provides a hierarchy of irreducible cycles, and the new implementation transforms each cycle in this hierarchy separately instead of reducing an entire irreducible SCC in a single step. This reduces the number of control-flow paths that pass through the header of each newly created loop. This is evidenced by the reduced number of predecessors on the "guard" blocks in the lit tests, and fewer operands on the corresponding PHI nodes.
3. When an entry of an irreducible cycle is the header of a child natural loop, the original implementation destroyed that loop. This is now preserved, since the incoming edges on non-header entries are not touched.
---
Patch is 74.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/103014.diff
10 Files Affected:
- (modified) llvm/include/llvm/ADT/GenericCycleInfo.h (+20-8)
- (modified) llvm/lib/Transforms/Utils/FixIrreducible.cpp (+179-185)
- (modified) llvm/test/CodeGen/AMDGPU/llc-pipeline.ll (+10-5)
- (modified) llvm/test/Transforms/FixIrreducible/basic.ll (+54-44)
- (modified) llvm/test/Transforms/FixIrreducible/bug45623.ll (+5-4)
- (modified) llvm/test/Transforms/FixIrreducible/nested.ll (+97-46)
- (modified) llvm/test/Transforms/FixIrreducible/switch.ll (+5-3)
- (modified) llvm/test/Transforms/FixIrreducible/unreachable.ll (+1)
- (modified) llvm/test/Transforms/StructurizeCFG/workarounds/needs-fix-reducible.ll (+34-22)
- (modified) llvm/test/Transforms/StructurizeCFG/workarounds/needs-fr-ule.ll (+95-78)
``````````diff
diff --git a/llvm/include/llvm/ADT/GenericCycleInfo.h b/llvm/include/llvm/ADT/GenericCycleInfo.h
index b5d719c6313c43..cf13f8e95a35e3 100644
--- a/llvm/include/llvm/ADT/GenericCycleInfo.h
+++ b/llvm/include/llvm/ADT/GenericCycleInfo.h
@@ -107,6 +107,13 @@ template <typename ContextT> class GenericCycle {
return is_contained(Entries, Block);
}
+ /// \brief Replace all entries with \p Block as single entry.
+ void setSingleEntry(BlockT *Block) {
+ assert(contains(Block));
+ Entries.clear();
+ Entries.push_back(Block);
+ }
+
/// \brief Return whether \p Block is contained in the cycle.
bool contains(const BlockT *Block) const { return Blocks.contains(Block); }
@@ -192,11 +199,16 @@ template <typename ContextT> class GenericCycle {
//@{
using const_entry_iterator =
typename SmallVectorImpl<BlockT *>::const_iterator;
-
+ const_entry_iterator entry_begin() const { return Entries.begin(); }
+ const_entry_iterator entry_end() const { return Entries.end(); }
size_t getNumEntries() const { return Entries.size(); }
iterator_range<const_entry_iterator> entries() const {
- return llvm::make_range(Entries.begin(), Entries.end());
+ return llvm::make_range(entry_begin(), entry_end());
}
+ using const_reverse_entry_iterator =
+ typename SmallVectorImpl<BlockT *>::const_reverse_iterator;
+ const_reverse_entry_iterator entry_rbegin() const { return Entries.rbegin(); }
+ const_reverse_entry_iterator entry_rend() const { return Entries.rend(); }
//@}
Printable printEntries(const ContextT &Ctx) const {
@@ -257,12 +269,6 @@ template <typename ContextT> class GenericCycleInfo {
/// the subtree.
void moveTopLevelCycleToNewParent(CycleT *NewParent, CycleT *Child);
- /// Assumes that \p Cycle is the innermost cycle containing \p Block.
- /// \p Block will be appended to \p Cycle and all of its parent cycles.
- /// \p Block will be added to BlockMap with \p Cycle and
- /// BlockMapTopLevel with \p Cycle's top level parent cycle.
- void addBlockToCycle(BlockT *Block, CycleT *Cycle);
-
public:
GenericCycleInfo() = default;
GenericCycleInfo(GenericCycleInfo &&) = default;
@@ -280,6 +286,12 @@ template <typename ContextT> class GenericCycleInfo {
unsigned getCycleDepth(const BlockT *Block) const;
CycleT *getTopLevelParentCycle(BlockT *Block);
+ /// Assumes that \p Cycle is the innermost cycle containing \p Block.
+ /// \p Block will be appended to \p Cycle and all of its parent cycles.
+ /// \p Block will be added to BlockMap with \p Cycle and
+ /// BlockMapTopLevel with \p Cycle's top level parent cycle.
+ void addBlockToCycle(BlockT *Block, CycleT *Cycle);
+
/// Methods for debug and self-test.
//@{
void verifyCycleNest(bool VerifyFull = false, LoopInfoT *LI = nullptr) const;
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 30075af2ffc654..11a26e63c8d375 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -6,50 +6,66 @@
//
//===----------------------------------------------------------------------===//
//
-// An irreducible SCC is one which has multiple "header" blocks, i.e., blocks
-// with control-flow edges incident from outside the SCC. This pass converts a
-// irreducible SCC into a natural loop by applying the following transformation:
-//
-// 1. Collect the set of headers H of the SCC.
-// 2. Collect the set of predecessors P of these headers. These may be inside as
-// well as outside the SCC.
-// 3. Create block N and redirect every edge from set P to set H through N.
-//
-// This converts the SCC into a natural loop with N as the header: N is the only
-// block with edges incident from outside the SCC, and all backedges in the SCC
-// are incident on N, i.e., for every backedge, the head now dominates the tail.
-//
-// INPUT CFG: The blocks A and B form an irreducible loop with two headers.
+// INPUT CFG: The blocks H and B form an irreducible cycle with two headers.
//
// Entry
// / \
// v v
-// A ----> B
+// H ----> B
// ^ /|
// `----' |
// v
// Exit
//
-// OUTPUT CFG: Edges incident on A and B are now redirected through a
-// new block N, forming a natural loop consisting of N, A and B.
+// OUTPUT CFG: Converted to a natural loop with a new header N.
//
// Entry
// |
// v
-// .---> N <---.
-// / / \ \
-// | / \ |
-// \ v v /
-// `-- A B --'
+// N <---.
+// / \ \
+// / \ |
+// v v /
+// H --> B --'
// |
// v
// Exit
//
-// The transformation is applied to every maximal SCC that is not already
-// recognized as a loop. The pass operates on all maximal SCCs found in the
-// function body outside of any loop, as well as those found inside each loop,
-// including inside any newly created loops. This ensures that any SCC hidden
-// inside a maximal SCC is also transformed.
+// To convert an irreducible cycle C to a natural loop L:
+//
+// 1. Add a new node N to C.
+// 2. Redirect all external incoming edges through N.
+// 3. Redirect all edges incident on header H through N.
+//
+// This is sufficient to ensure that:
+//
+// a. Every closed path in C also exists in L, with the modification that any
+// path passing through H now passes through N before reaching H.
+// b. Every external path incident on any entry of C is now incident on N and
+// then redirected to the entry.
+//
+// Thus, L is a strongly connected component dominated by N, and hence L is a
+// natural loop with header N.
+//
+// When an irreducible cycle C with header H is transformed into a loop, the
+// following invariants hold:
+//
+// 1. No new subcycles are "discovered" in the set (C-H). The only internal
+// edges that are redirected by the transform are incident on H. Any subcycle
+// S in (C-H), already existed prior to this transform, and is already in the
+// list of children for this cycle C.
+//
+// 2. Subcycles of C are not modified by the transform. For some subcycle S of
+// C, edges incident on the entries of S are either internal to C, or they
+// are now redirected through N, which is outside of S. So the list of
+// entries to S does not change. Since the transform only adds a block
+// outside S, and redirects edges that are not internal to S, the list of
+// blocks in S does not change.
+//
+// 3. Similarly, any natural loop L included in C is not affected, with one
+// exception: L is "destroyed" by the transform iff its header is H. The
+// backedges of such a loop are now redirected to N instead, and hence the
+// body of this loop gets merged into the new loop with header N.
//
// The actual transformation is handled by the ControlFlowHub, which redirects
// specified control flow edges through a set of guard blocks. This also moves
@@ -67,8 +83,9 @@
#include "llvm/Transforms/Utils/FixIrreducible.h"
#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/CycleAnalysis.h"
#include "llvm/Analysis/DomTreeUpdater.h"
-#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Transforms/Utils.h"
@@ -88,8 +105,9 @@ struct FixIrreducible : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<CycleInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<CycleInfoWrapperPass>();
AU.addPreserved<LoopInfoWrapperPass>();
}
@@ -113,16 +131,14 @@ INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible",
// When a new loop is created, existing children of the parent loop may now be
// fully inside the new loop. Reconnect these as children of the new loop.
static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop,
- SetVector<BasicBlock *> &Blocks,
- SetVector<BasicBlock *> &Headers) {
+ BasicBlock *OldHeader) {
auto &CandidateLoops = ParentLoop ? ParentLoop->getSubLoopsVector()
: LI.getTopLevelLoopsVector();
- // The new loop cannot be its own child, and any candidate is a
- // child iff its header is owned by the new loop. Move all the
- // children to a new vector.
+ // Any candidate is a child iff its header is owned by the new loop. Move all
+ // the children to a new vector.
auto FirstChild = std::partition(
CandidateLoops.begin(), CandidateLoops.end(), [&](Loop *L) {
- return L == NewLoop || !Blocks.contains(L->getHeader());
+ return NewLoop == L || !NewLoop->contains(L->getHeader());
});
SmallVector<Loop *, 8> ChildLoops(FirstChild, CandidateLoops.end());
CandidateLoops.erase(FirstChild, CandidateLoops.end());
@@ -130,10 +146,9 @@ static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop,
for (Loop *Child : ChildLoops) {
LLVM_DEBUG(dbgs() << "child loop: " << Child->getHeader()->getName()
<< "\n");
- // TODO: A child loop whose header is also a header in the current
- // SCC gets destroyed since its backedges are removed. That may
- // not be necessary if we can retain such backedges.
- if (Headers.count(Child->getHeader())) {
+ // A child loop whose header was the old cycle header gets destroyed since
+ // its backedges are removed.
+ if (Child->getHeader() == OldHeader) {
for (auto *BB : Child->blocks()) {
if (LI.getLoopFor(BB) != Child)
continue;
@@ -158,63 +173,11 @@ static void reconnectChildLoops(LoopInfo &LI, Loop *ParentLoop, Loop *NewLoop,
}
}
-// Given a set of blocks and headers in an irreducible SCC, convert it into a
-// natural loop. Also insert this new loop at its appropriate place in the
-// hierarchy of loops.
-static void createNaturalLoopInternal(LoopInfo &LI, DominatorTree &DT,
- Loop *ParentLoop,
- SetVector<BasicBlock *> &Blocks,
- SetVector<BasicBlock *> &Headers) {
-#ifndef NDEBUG
- // All headers are part of the SCC
- for (auto *H : Headers) {
- assert(Blocks.count(H));
- }
-#endif
-
- SetVector<BasicBlock *> Predecessors;
- for (auto *H : Headers) {
- for (auto *P : predecessors(H)) {
- Predecessors.insert(P);
- }
- }
-
- LLVM_DEBUG(
- dbgs() << "Found predecessors:";
- for (auto P : Predecessors) {
- dbgs() << " " << P->getName();
- }
- dbgs() << "\n");
-
- // Redirect all the backedges through a "hub" consisting of a series
- // of guard blocks that manage the flow of control from the
- // predecessors to the headers.
- ControlFlowHub CHub;
- for (BasicBlock *P : Predecessors) {
- auto *Branch = cast<BranchInst>(P->getTerminator());
- BasicBlock *Succ0 = Branch->getSuccessor(0);
- Succ0 = Headers.count(Succ0) ? Succ0 : nullptr;
- BasicBlock *Succ1 =
- Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
- Succ1 = Succ1 && Headers.count(Succ1) ? Succ1 : nullptr;
- CHub.addBranch(P, Succ0, Succ1);
-
- LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
- << (Succ0 ? Succ0->getName() : "") << " "
- << (Succ1 ? Succ1->getName() : "") << "\n");
- }
-
- SmallVector<BasicBlock *, 8> GuardBlocks;
- DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
- CHub.finalize(&DTU, GuardBlocks, "irr");
-#if defined(EXPENSIVE_CHECKS)
- assert(DT.verify(DominatorTree::VerificationLevel::Full));
-#else
- assert(DT.verify(DominatorTree::VerificationLevel::Fast));
-#endif
-
+static void updateLoopInfo(LoopInfo &LI, Cycle &C,
+ ArrayRef<BasicBlock *> GuardBlocks) {
+ Loop *ParentLoop = LI.getLoopFor(C.getHeader());
// Create a new loop from the now-transformed cycle
- auto NewLoop = LI.AllocateLoop();
+ auto *NewLoop = LI.AllocateLoop();
if (ParentLoop) {
ParentLoop->addChildLoop(NewLoop);
} else {
@@ -227,12 +190,11 @@ static void createNaturalLoopInternal(LoopInfo &LI, DominatorTree &DT,
// header. Since the new loop is already in LoopInfo, the new blocks
// are also propagated up the chain of parent loops.
for (auto *G : GuardBlocks) {
- LLVM_DEBUG(dbgs() << "added guard block: " << G->getName() << "\n");
+ LLVM_DEBUG(dbgs() << "added guard block to loop: " << G->getName() << "\n");
NewLoop->addBasicBlockToLoop(G, LI);
}
- // Add the SCC blocks to the new loop.
- for (auto *BB : Blocks) {
+ for (auto *BB : C.blocks()) {
NewLoop->addBlockEntry(BB);
if (LI.getLoopFor(BB) == ParentLoop) {
LLVM_DEBUG(dbgs() << "moved block from parent: " << BB->getName()
@@ -245,129 +207,161 @@ static void createNaturalLoopInternal(LoopInfo &LI, DominatorTree &DT,
LLVM_DEBUG(dbgs() << "header for new loop: "
<< NewLoop->getHeader()->getName() << "\n");
- reconnectChildLoops(LI, ParentLoop, NewLoop, Blocks, Headers);
+ reconnectChildLoops(LI, ParentLoop, NewLoop, C.getHeader());
+ LLVM_DEBUG(dbgs() << "Verify new loop.\n"; NewLoop->print(dbgs()));
NewLoop->verifyLoop();
if (ParentLoop) {
+ LLVM_DEBUG(dbgs() << "Verify parent loop.\n"; ParentLoop->print(dbgs()));
ParentLoop->verifyLoop();
}
-#if defined(EXPENSIVE_CHECKS)
- LI.verify(DT);
-#endif // EXPENSIVE_CHECKS
}
-namespace llvm {
-// Enable the graph traits required for traversing a Loop body.
-template <> struct GraphTraits<Loop> : LoopBodyTraits {};
-} // namespace llvm
+// Given a set of blocks and headers in an irreducible SCC, convert it into a
+// natural loop. Also insert this new loop at its appropriate place in the
+// hierarchy of loops.
+static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT,
+ LoopInfo *LI) {
+ if (C.isReducible())
+ return false;
+ LLVM_DEBUG(dbgs() << "Processing cycle:\n" << CI.print(&C) << "\n";);
-// Overloaded wrappers to go with the function template below.
-static BasicBlock *unwrapBlock(BasicBlock *B) { return B; }
-static BasicBlock *unwrapBlock(LoopBodyTraits::NodeRef &N) { return N.second; }
+ ControlFlowHub CHub;
+ SetVector<BasicBlock *> Predecessors;
-static void createNaturalLoop(LoopInfo &LI, DominatorTree &DT, Function *F,
- SetVector<BasicBlock *> &Blocks,
- SetVector<BasicBlock *> &Headers) {
- createNaturalLoopInternal(LI, DT, nullptr, Blocks, Headers);
-}
+ // Redirect internal edges incident on the header.
+ BasicBlock *Header = C.getHeader();
+ for (BasicBlock *P : predecessors(Header)) {
+ if (C.contains(P))
+ Predecessors.insert(P);
+ }
-static void createNaturalLoop(LoopInfo &LI, DominatorTree &DT, Loop &L,
- SetVector<BasicBlock *> &Blocks,
- SetVector<BasicBlock *> &Headers) {
- createNaturalLoopInternal(LI, DT, &L, Blocks, Headers);
-}
+ for (BasicBlock *P : Predecessors) {
+ auto *Branch = cast<BranchInst>(P->getTerminator());
+ // Exactly one of the two successors is the header.
+ BasicBlock *Succ0 = Branch->getSuccessor(0) == Header ? Header : nullptr;
+ BasicBlock *Succ1 = Succ0 ? nullptr : Header;
+ if (!Succ0)
+ assert(Branch->getSuccessor(1) == Header);
+ assert(Succ0 || Succ1);
+ CHub.addBranch(P, Succ0, Succ1);
-// Convert irreducible SCCs; Graph G may be a Function* or a Loop&.
-template <class Graph>
-static bool makeReducible(LoopInfo &LI, DominatorTree &DT, Graph &&G) {
- bool Changed = false;
- for (auto Scc = scc_begin(G); !Scc.isAtEnd(); ++Scc) {
- if (Scc->size() < 2)
- continue;
- SetVector<BasicBlock *> Blocks;
- LLVM_DEBUG(dbgs() << "Found SCC:");
- for (auto N : *Scc) {
- auto BB = unwrapBlock(N);
- LLVM_DEBUG(dbgs() << " " << BB->getName());
- Blocks.insert(BB);
- }
- LLVM_DEBUG(dbgs() << "\n");
-
- // Minor optimization: The SCC blocks are usually discovered in an order
- // that is the opposite of the order in which these blocks appear as branch
- // targets. This results in a lot of condition inversions in the control
- // flow out of the new ControlFlowHub, which can be mitigated if the orders
- // match. So we discover the headers using the reverse of the block order.
- SetVector<BasicBlock *> Headers;
- LLVM_DEBUG(dbgs() << "Found headers:");
- for (auto *BB : reverse(Blocks)) {
- for (const auto P : predecessors(BB)) {
- // Skip unreachable predecessors.
- if (!DT.isReachableFromEntry(P))
- continue;
- if (!Blocks.count(P)) {
- LLVM_DEBUG(dbgs() << " " << BB->getName());
- Headers.insert(BB);
- break;
- }
- }
- }
- LLVM_DEBUG(dbgs() << "\n");
+ LLVM_DEBUG(dbgs() << "Added internal branch: " << P->getName() << " -> "
+ << (Succ0 ? Succ0->getName() : "") << " "
+ << (Succ1 ? Succ1->getName() : "") << "\n");
+ }
- if (Headers.size() == 1) {
- assert(LI.isLoopHeader(Headers.front()));
- LLVM_DEBUG(dbgs() << "Natural loop with a single header: skipped\n");
- continue;
+ // Redirect external incoming edges. This includes the edges on the header.
+ Predecessors.clear();
+ for (BasicBlock *E : C.entries()) {
+ for (BasicBlock *P : predecessors(E)) {
+ if (!C.contains(P))
+ Predecessors.insert(P);
}
- createNaturalLoop(LI, DT, G, Blocks, Headers);
- Changed = true;
}
- return Changed;
+
+ for (BasicBlock *P : Predecessors) {
+ auto *Branch = cast<BranchInst>(P->getTerminator());
+ BasicBlock *Succ0 = Branch->getSuccessor(0);
+ Succ0 = C.contains(Succ0) ? Succ0 : nullptr;
+ BasicBlock *Succ1 =
+ Branch->isUnconditional() ? nullptr : Branch->getSuccessor(1);
+ Succ1 = Succ1 && C.contains(Succ1) ? Succ1 : nullptr;
+ CHub.addBranch(P, Succ0, Succ1);
+
+ LLVM_DEBUG(dbgs() << "Added external branch: " << P->getName() << " -> "
+ << (Succ0 ? Succ0->getName() : "") << " "
+ << (Succ1 ? Succ1->getName() : "") << "\n");
+ }
+
+ // Redirect all the backedges through a "hub" consisting of a series
+ // of guard blocks that manage the flow of control from the
+ // predecessors to the headers.
+ SmallVector<BasicBlock *> GuardBlocks;
+
+ // Minor optimization: The cycle entries are discovered in an order that is
+ // the opposite of the order in which these blocks appear as branch targets.
+ // This results in a lot of condition inversions in the control flow out of
+ // the new ControlFlowHub, which can be mitigated if the orders match. So we
+ // reverse the entries when adding them to the hub.
+ SetVector<BasicBlock *> Entries;
+ Entries.insert(C.entry_rbegin(), C.entry_rend());
+
+ DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
+ CHub.finalize(&DTU, GuardBlocks, "irr");
+#if defined(EXPENSIVE_CHECKS)
+ assert(DT.verify(DominatorTree::VerificationLevel::Full));
+#else
+ assert(DT.verify(DominatorTree::VerificationLevel::Fast));
+#endif
+
+ // If we are updating LoopInfo, do that now before modifying the cycle. This
+ // ensures that the first guard block is the header of a new natural loop.
+ if (LI)
+ updateLoopInfo(*LI, C, GuardBlocks);
+
+ for (auto *G : GuardBlocks) {
+ LLVM_DEBUG(dbgs() << "added guard block to cycle: " << G->getName()
+ << "\n");
+ CI.addBlockToCycle(G, &C);
+ }
+ C.setSingleEntry(GuardBlocks[0]);
+
+ C.verifyCycle();
+ if (Cycle *Parent = C.getParentCycle())
+ Parent->verifyCycle();
+
+ ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/103014
More information about the llvm-branch-commits
mailing list