[llvm] [LoopFusion] Simplifying the legality checks (PR #171889)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 11 11:57:43 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Alireza Torabian (1997alireza)
<details>
<summary>Changes</summary>
Considering that the current loop fusion only supports adjacent loops, we are able to simplify the checks in this pass. By removing `isControlFlowEquivalent` check, this patch fixes multiple issues including #<!-- -->166560, #<!-- -->166535, #<!-- -->165031, #<!-- -->80301 and #<!-- -->168263.
Now only the sequential candidates are collected in the same list. This patch is the implementation of approach 2 discussed in post #<!-- -->171207.
---
Patch is 57.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171889.diff
6 Files Affected:
- (modified) llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h (-16)
- (modified) llvm/lib/Transforms/Scalar/LoopFuse.cpp (+215-305)
- (modified) llvm/lib/Transforms/Utils/CodeMoverUtils.cpp (-44)
- (modified) llvm/test/Transforms/LoopFusion/cannot_fuse.ll (+16-21)
- (modified) llvm/test/Transforms/LoopFusion/diagnostics_missed.ll (+2-55)
- (modified) llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp (-367)
``````````diff
diff --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
index 877872485ab58..d473f7092f62e 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
@@ -27,22 +27,6 @@ class DominatorTree;
class Instruction;
class PostDominatorTree;
-/// Return true if \p I0 and \p I1 are control flow equivalent.
-/// Two instructions are control flow equivalent if their basic blocks are
-/// control flow equivalent.
-LLVM_ABI bool isControlFlowEquivalent(const Instruction &I0,
- const Instruction &I1,
- const DominatorTree &DT,
- const PostDominatorTree &PDT);
-
-/// Return true if \p BB0 and \p BB1 are control flow equivalent.
-/// Two basic blocks are control flow equivalent if when one executes, the other
-/// is guaranteed to execute.
-LLVM_ABI bool isControlFlowEquivalent(const BasicBlock &BB0,
- const BasicBlock &BB1,
- const DominatorTree &DT,
- const PostDominatorTree &PDT);
-
/// Return true if \p I can be safely moved before \p InsertPoint.
LLVM_ABI bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
DominatorTree &DT,
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 9ffa602416b05..a71b01a9aaf24 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -64,6 +64,7 @@
#include "llvm/Transforms/Utils/CodeMoverUtils.h"
#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include <list>
using namespace llvm;
@@ -174,10 +175,6 @@ struct FusionCandidate {
/// Has this loop been Peeled
bool Peeled;
- /// Dominator and PostDominator trees are needed for the
- /// FusionCandidateCompare function, required by FusionCandidateSet to
- /// determine where the FusionCandidate should be inserted into the set. These
- /// are used to establish ordering of the FusionCandidates based on dominance.
DominatorTree &DT;
const PostDominatorTree *PDT;
@@ -358,10 +355,10 @@ struct FusionCandidate {
private:
// This is only used internally for now, to clear the MemWrites and MemReads
// list and setting Valid to false. I can't envision other uses of this right
- // now, since once FusionCandidates are put into the FusionCandidateSet they
+ // now, since once FusionCandidates are put into the FusionCandidateList they
// are immutable. Thus, any time we need to change/update a FusionCandidate,
- // we must create a new one and insert it into the FusionCandidateSet to
- // ensure the FusionCandidateSet remains ordered correctly.
+ // we must create a new one and insert it into the FusionCandidateList to
+ // ensure the FusionCandidateList remains ordered correctly.
void invalidate() {
MemWrites.clear();
MemReads.clear();
@@ -381,86 +378,15 @@ struct FusionCandidate {
return false;
}
};
-
-struct FusionCandidateCompare {
- /// Comparison functor to sort two Control Flow Equivalent fusion candidates
- /// into dominance order.
- /// If LHS dominates RHS and RHS post-dominates LHS, return true;
- /// If RHS dominates LHS and LHS post-dominates RHS, return false;
- /// If both LHS and RHS are not dominating each other then, non-strictly
- /// post dominate check will decide the order of candidates. If RHS
- /// non-strictly post dominates LHS then, return true. If LHS non-strictly
- /// post dominates RHS then, return false. If both are non-strictly post
- /// dominate each other then, level in the post dominator tree will decide
- /// the order of candidates.
- bool operator()(const FusionCandidate &LHS,
- const FusionCandidate &RHS) const {
- const DominatorTree *DT = &(LHS.DT);
-
- BasicBlock *LHSEntryBlock = LHS.getEntryBlock();
- BasicBlock *RHSEntryBlock = RHS.getEntryBlock();
-
- // Do not save PDT to local variable as it is only used in asserts and thus
- // will trigger an unused variable warning if building without asserts.
- assert(DT && LHS.PDT && "Expecting valid dominator tree");
-
- // Do this compare first so if LHS == RHS, function returns false.
- if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) {
- // RHS dominates LHS
- // Verify LHS post-dominates RHS
- assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));
- return false;
- }
-
- if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) {
- // Verify RHS Postdominates LHS
- assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));
- return true;
- }
-
- // If two FusionCandidates are in the same level of dominator tree,
- // they will not dominate each other, but may still be control flow
- // equivalent. To sort those FusionCandidates, nonStrictlyPostDominate()
- // function is needed.
- bool WrongOrder =
- nonStrictlyPostDominate(LHSEntryBlock, RHSEntryBlock, DT, LHS.PDT);
- bool RightOrder =
- nonStrictlyPostDominate(RHSEntryBlock, LHSEntryBlock, DT, LHS.PDT);
- if (WrongOrder && RightOrder) {
- // If common predecessor of LHS and RHS post dominates both
- // FusionCandidates then, Order of FusionCandidate can be
- // identified by its level in post dominator tree.
- DomTreeNode *LNode = LHS.PDT->getNode(LHSEntryBlock);
- DomTreeNode *RNode = LHS.PDT->getNode(RHSEntryBlock);
- return LNode->getLevel() > RNode->getLevel();
- } else if (WrongOrder)
- return false;
- else if (RightOrder)
- return true;
-
- // If LHS does not non-strict Postdominate RHS and RHS does not non-strict
- // Postdominate LHS then, there is no dominance relationship between the
- // two FusionCandidates. Thus, they should not be in the same set together.
- llvm_unreachable(
- "No dominance relationship between these fusion candidates!");
- }
-};
} // namespace
using LoopVector = SmallVector<Loop *, 4>;
-// Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance
-// order. Thus, if FC0 comes *before* FC1 in a FusionCandidateSet, then FC0
-// dominates FC1 and FC1 post-dominates FC0.
-// std::set was chosen because we want a sorted data structure with stable
-// iterators. A subsequent patch to loop fusion will enable fusing non-adjacent
-// loops by moving intervening code around. When this intervening code contains
-// loops, those loops will be moved also. The corresponding FusionCandidates
-// will also need to be moved accordingly. As this is done, having stable
-// iterators will simplify the logic. Similarly, having an efficient insert that
-// keeps the FusionCandidateSet sorted will also simplify the implementation.
-using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
-using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
+// List of adjacent Fusion Candidates in order. Thus, if FC0 comes *before* FC1
+// in a FusionCandidateList, then FC0 dominates FC1, FC1 post-dominates FC0,
+// and they are sequential.
+using FusionCandidateList = std::list<FusionCandidate>;
+using FusionCandidateCollection = SmallVector<FusionCandidateList, 4>;
#ifndef NDEBUG
static void printLoopVector(const LoopVector &LV) {
@@ -480,8 +406,8 @@ static raw_ostream &operator<<(raw_ostream &OS, const FusionCandidate &FC) {
}
static raw_ostream &operator<<(raw_ostream &OS,
- const FusionCandidateSet &CandSet) {
- for (const FusionCandidate &FC : CandSet)
+ const FusionCandidateList &CandList) {
+ for (const FusionCandidate &FC : CandList)
OS << FC << '\n';
return OS;
@@ -490,9 +416,9 @@ static raw_ostream &operator<<(raw_ostream &OS,
static void
printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
dbgs() << "Fusion Candidates: \n";
- for (const auto &CandidateSet : FusionCandidates) {
- dbgs() << "*** Fusion Candidate Set ***\n";
- dbgs() << CandidateSet;
+ for (const auto &CandidateList : FusionCandidates) {
+ dbgs() << "*** Fusion Candidate List ***\n";
+ dbgs() << CandidateList;
dbgs() << "****************************\n";
}
}
@@ -648,20 +574,6 @@ struct LoopFuser {
}
private:
- /// Determine if two fusion candidates are control flow equivalent.
- ///
- /// Two fusion candidates are control flow equivalent if when one executes,
- /// the other is guaranteed to execute. This is determined using dominators
- /// and post-dominators: if A dominates B and B post-dominates A then A and B
- /// are control-flow equivalent.
- bool isControlFlowEquivalent(const FusionCandidate &FC0,
- const FusionCandidate &FC1) const {
- assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
-
- return ::isControlFlowEquivalent(*FC0.getEntryBlock(), *FC1.getEntryBlock(),
- DT, PDT);
- }
-
/// Iterate over all loops in the given loop set and identify the loops that
/// are eligible for fusion. Place all eligible fusion candidates into Control
/// Flow Equivalent sets, sorted by dominance.
@@ -673,34 +585,42 @@ struct LoopFuser {
if (!CurrCand.isEligibleForFusion(SE))
continue;
- // Go through each list in FusionCandidates and determine if L is control
- // flow equivalent with the first loop in that list. If it is, append LV.
+ // Go through each list in FusionCandidates and determine if the first or
+ // last loop in the list is strictly adjacent to L. If it is, append L.
// If not, go to the next list.
// If no suitable list is found, start another list and add it to
// FusionCandidates.
- bool FoundSet = false;
-
- for (auto &CurrCandSet : FusionCandidates) {
- if (isControlFlowEquivalent(*CurrCandSet.begin(), CurrCand)) {
- CurrCandSet.insert(CurrCand);
- FoundSet = true;
+ bool FoundAdjacent = false;
+ for (auto &CurrCandList : FusionCandidates) {
+ if (isStrictlyAdjacent(CurrCand, CurrCandList.front())) {
+ CurrCandList.push_front(CurrCand);
+ FoundAdjacent = true;
#ifndef NDEBUG
if (VerboseFusionDebugging)
LLVM_DEBUG(dbgs() << "Adding " << CurrCand
- << " to existing candidate set\n");
+ << " to existing candidate list\n");
+#endif
+ break;
+ } else if (isStrictlyAdjacent(CurrCandList.back(), CurrCand)) {
+ CurrCandList.push_back(CurrCand);
+ FoundAdjacent = true;
+#ifndef NDEBUG
+ if (VerboseFusionDebugging)
+ LLVM_DEBUG(dbgs() << "Adding " << CurrCand
+ << " to existing candidate list\n");
#endif
break;
}
}
- if (!FoundSet) {
- // No set was found. Create a new set and add to FusionCandidates
+ if (!FoundAdjacent) {
+ // No list was found. Create a new list and add to FusionCandidates
#ifndef NDEBUG
if (VerboseFusionDebugging)
- LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");
+ LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new list\n");
#endif
- FusionCandidateSet NewCandSet;
- NewCandSet.insert(CurrCand);
- FusionCandidates.push_back(NewCandSet);
+ FusionCandidateList NewCandList;
+ NewCandList.push_back(CurrCand);
+ FusionCandidates.push_back(NewCandList);
}
NumFusionCandidates++;
}
@@ -849,218 +769,205 @@ struct LoopFuser {
}
}
- /// Walk each set of control flow equivalent fusion candidates and attempt to
- /// fuse them. This does a single linear traversal of all candidates in the
- /// set. The conditions for legal fusion are checked at this point. If a pair
- /// of fusion candidates passes all legality checks, they are fused together
- /// and a new fusion candidate is created and added to the FusionCandidateSet.
+ /// Walk each set of strictly adjacent fusion candidates and attempt to fuse
+ /// them. This does a single linear traversal of all candidates in the list.
+ /// The conditions for legal fusion are checked at this point. If a pair of
+ /// fusion candidates passes all legality checks, they are fused together and
+ /// a new fusion candidate is created and added to the FusionCandidateList.
/// The original fusion candidates are then removed, as they are no longer
/// valid.
bool fuseCandidates() {
bool Fused = false;
LLVM_DEBUG(printFusionCandidates(FusionCandidates));
- for (auto &CandidateSet : FusionCandidates) {
- if (CandidateSet.size() < 2)
+ for (auto &CandidateList : FusionCandidates) {
+ if (CandidateList.size() < 2)
continue;
- LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"
- << CandidateSet << "\n");
-
- for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
- assert(!LDT.isRemovedLoop(FC0->L) &&
- "Should not have removed loops in CandidateSet!");
- auto FC1 = FC0;
- for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
- assert(!LDT.isRemovedLoop(FC1->L) &&
- "Should not have removed loops in CandidateSet!");
-
- LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();
- dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");
-
- FC0->verify();
- FC1->verify();
-
- // Check if the candidates have identical tripcounts (first value of
- // pair), and if not check the difference in the tripcounts between
- // the loops (second value of pair). The difference is not equal to
- // std::nullopt iff the loops iterate a constant number of times, and
- // have a single exit.
- std::pair<bool, std::optional<unsigned>> IdenticalTripCountRes =
- haveIdenticalTripCounts(*FC0, *FC1);
- bool SameTripCount = IdenticalTripCountRes.first;
- std::optional<unsigned> TCDifference = IdenticalTripCountRes.second;
-
- // Here we are checking that FC0 (the first loop) can be peeled, and
- // both loops have different tripcounts.
- if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
- if (*TCDifference > FusionPeelMaxCount) {
- LLVM_DEBUG(dbgs()
- << "Difference in loop trip counts: " << *TCDifference
- << " is greater than maximum peel count specificed: "
- << FusionPeelMaxCount << "\n");
- } else {
- // Dependent on peeling being performed on the first loop, and
- // assuming all other conditions for fusion return true.
- SameTripCount = true;
- }
- }
+ LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate List:\n"
+ << CandidateList << "\n");
- if (!SameTripCount) {
- LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
- "counts. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- NonEqualTripCount);
- continue;
- }
+ for (auto It = CandidateList.begin(), NextIt = std::next(It);
+ NextIt != CandidateList.end(); It = NextIt, NextIt = std::next(It)) {
- if (!isAdjacent(*FC0, *FC1)) {
- LLVM_DEBUG(dbgs()
- << "Fusion candidates are not adjacent. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, NonAdjacent);
- continue;
- }
+ auto FC0 = *It;
+ auto FC1 = *NextIt;
- if ((!FC0->GuardBranch && FC1->GuardBranch) ||
- (FC0->GuardBranch && !FC1->GuardBranch)) {
- LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the "
- "another one is not. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(
- *FC0, *FC1, OnlySecondCandidateIsGuarded);
- continue;
- }
+ assert(!LDT.isRemovedLoop(FC0.L) &&
+ "Should not have removed loops in CandidateList!");
+ assert(!LDT.isRemovedLoop(FC1.L) &&
+ "Should not have removed loops in CandidateList!");
- // Ensure that FC0 and FC1 have identical guards.
- // If one (or both) are not guarded, this check is not necessary.
- if (FC0->GuardBranch && FC1->GuardBranch &&
- !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
- LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
- "guards. Not Fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- NonIdenticalGuards);
- continue;
- }
+ LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0.dump();
+ dbgs() << " with\n"; FC1.dump(); dbgs() << "\n");
- if (FC0->GuardBranch) {
- assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
-
- if (!isSafeToMoveBefore(*FC0->ExitBlock,
- *FC1->ExitBlock->getFirstNonPHIOrDbg(), DT,
- &PDT, &DI)) {
- LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
- "instructions in exit block. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- NonEmptyExitBlock);
- continue;
- }
+ FC0.verify();
+ FC1.verify();
- if (!isSafeToMoveBefore(
- *FC1->GuardBranch->getParent(),
- *FC0->GuardBranch->getParent()->getTerminator(), DT, &PDT,
- &DI)) {
- LLVM_DEBUG(dbgs()
- << "Fusion candidate contains unsafe "
- "instructions in guard block. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- NonEmptyGuardBlock);
- continue;
- }
- }
-
- // Check the dependencies across the loops and do not fuse if it would
- // violate them.
- if (!dependencesAllowFusion(*FC0, *FC1)) {
- LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- InvalidDependencies);
- continue;
- }
+ // Check if the candidates have identical tripcounts (first value of
+ // pair), and if not check the difference in the tripcounts between
+ // the loops (second value of pair). The difference is not equal to
+ // std::nullopt iff the loops iterate a constant number of times, and
+ // have a single exit.
+ std::pair<bool, std::optional<unsigned>> IdenticalTripCountRes =
+ haveIdenticalTripCounts(FC0, FC1);
+ bool SameTripCount = IdenticalTripCountRes.first;
+ std::optional<unsigned> TCDifference = IdenticalTripCountRes.second;
- // If the second loop has instructions in the pre-header, attempt to
- // hoist them up to the first loop's pre-header or sink them into the
- // body of the second loop.
- SmallVector<Instruction *, 4> SafeToHoist;
- SmallVector<Instruction *, 4> SafeToSink;
- // At this point, this is the last remaining legality check.
- // Which means if we can make this pre-header empty, we can fuse
- // these loops
- if (!isEmptyPreheader(*FC1)) {
- LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
- "preheader.\n");
-
- // If it is n...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/171889
More information about the llvm-commits
mailing list