[llvm] 9bc38df - [LoopFusion] Simplifying the legality checks (#171889)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 12 12:09:38 PST 2025
Author: Alireza Torabian
Date: 2025-12-12T15:09:34-05:00
New Revision: 9bc38df587def5e88cfb18ec7f912c9a588c5d4b
URL: https://github.com/llvm/llvm-project/commit/9bc38df587def5e88cfb18ec7f912c9a588c5d4b
DIFF: https://github.com/llvm/llvm-project/commit/9bc38df587def5e88cfb18ec7f912c9a588c5d4b.diff
LOG: [LoopFusion] Simplifying the legality checks (#171889)
Considering that the current loop fusion only supports adjacent loops,
we are able to simplify the checks in this pass. By removing
`isControlFlowEquivalent` check, this patch fixes multiple issues
including #166560, #166535, #165031, #80301 and #168263.
Now only the sequential/adjacent candidates are collected in the same
list. This patch is the implementation of approach 2 discussed in post
#171207.
Added:
Modified:
llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
llvm/lib/Transforms/Scalar/LoopFuse.cpp
llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
llvm/test/Transforms/LoopFusion/cannot_fuse.ll
llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
index 877872485ab58..d473f7092f62e 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
@@ -27,22 +27,6 @@ class DominatorTree;
class Instruction;
class PostDominatorTree;
-/// Return true if \p I0 and \p I1 are control flow equivalent.
-/// Two instructions are control flow equivalent if their basic blocks are
-/// control flow equivalent.
-LLVM_ABI bool isControlFlowEquivalent(const Instruction &I0,
- const Instruction &I1,
- const DominatorTree &DT,
- const PostDominatorTree &PDT);
-
-/// Return true if \p BB0 and \p BB1 are control flow equivalent.
-/// Two basic blocks are control flow equivalent if when one executes, the other
-/// is guaranteed to execute.
-LLVM_ABI bool isControlFlowEquivalent(const BasicBlock &BB0,
- const BasicBlock &BB1,
- const DominatorTree &DT,
- const PostDominatorTree &PDT);
-
/// Return true if \p I can be safely moved before \p InsertPoint.
LLVM_ABI bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
DominatorTree &DT,
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 9ffa602416b05..3a06c3f00fa02 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -64,6 +64,7 @@
#include "llvm/Transforms/Utils/CodeMoverUtils.h"
#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include <list>
using namespace llvm;
@@ -85,7 +86,6 @@ STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
STATISTIC(UnknownTripCount, "Loop has unknown trip count");
STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
-STATISTIC(NonAdjacent, "Loops are not adjacent");
STATISTIC(
NonEmptyPreheader,
"Loop has a non-empty preheader with instructions that cannot be moved");
@@ -174,10 +174,6 @@ struct FusionCandidate {
/// Has this loop been Peeled
bool Peeled;
- /// Dominator and PostDominator trees are needed for the
- /// FusionCandidateCompare function, required by FusionCandidateSet to
- /// determine where the FusionCandidate should be inserted into the set. These
- /// are used to establish ordering of the FusionCandidates based on dominance.
DominatorTree &DT;
const PostDominatorTree *PDT;
@@ -358,10 +354,10 @@ struct FusionCandidate {
private:
// This is only used internally for now, to clear the MemWrites and MemReads
// list and setting Valid to false. I can't envision other uses of this right
- // now, since once FusionCandidates are put into the FusionCandidateSet they
+ // now, since once FusionCandidates are put into the FusionCandidateList they
// are immutable. Thus, any time we need to change/update a FusionCandidate,
- // we must create a new one and insert it into the FusionCandidateSet to
- // ensure the FusionCandidateSet remains ordered correctly.
+ // we must create a new one and insert it into the FusionCandidateList to
+ // ensure the FusionCandidateList remains ordered correctly.
void invalidate() {
MemWrites.clear();
MemReads.clear();
@@ -381,86 +377,15 @@ struct FusionCandidate {
return false;
}
};
-
-struct FusionCandidateCompare {
- /// Comparison functor to sort two Control Flow Equivalent fusion candidates
- /// into dominance order.
- /// If LHS dominates RHS and RHS post-dominates LHS, return true;
- /// If RHS dominates LHS and LHS post-dominates RHS, return false;
- /// If both LHS and RHS are not dominating each other then, non-strictly
- /// post dominate check will decide the order of candidates. If RHS
- /// non-strictly post dominates LHS then, return true. If LHS non-strictly
- /// post dominates RHS then, return false. If both are non-strictly post
- /// dominate each other then, level in the post dominator tree will decide
- /// the order of candidates.
- bool operator()(const FusionCandidate &LHS,
- const FusionCandidate &RHS) const {
- const DominatorTree *DT = &(LHS.DT);
-
- BasicBlock *LHSEntryBlock = LHS.getEntryBlock();
- BasicBlock *RHSEntryBlock = RHS.getEntryBlock();
-
- // Do not save PDT to local variable as it is only used in asserts and thus
- // will trigger an unused variable warning if building without asserts.
- assert(DT && LHS.PDT && "Expecting valid dominator tree");
-
- // Do this compare first so if LHS == RHS, function returns false.
- if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) {
- // RHS dominates LHS
- // Verify LHS post-dominates RHS
- assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));
- return false;
- }
-
- if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) {
- // Verify RHS Postdominates LHS
- assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));
- return true;
- }
-
- // If two FusionCandidates are in the same level of dominator tree,
- // they will not dominate each other, but may still be control flow
- // equivalent. To sort those FusionCandidates, nonStrictlyPostDominate()
- // function is needed.
- bool WrongOrder =
- nonStrictlyPostDominate(LHSEntryBlock, RHSEntryBlock, DT, LHS.PDT);
- bool RightOrder =
- nonStrictlyPostDominate(RHSEntryBlock, LHSEntryBlock, DT, LHS.PDT);
- if (WrongOrder && RightOrder) {
- // If common predecessor of LHS and RHS post dominates both
- // FusionCandidates then, Order of FusionCandidate can be
- // identified by its level in post dominator tree.
- DomTreeNode *LNode = LHS.PDT->getNode(LHSEntryBlock);
- DomTreeNode *RNode = LHS.PDT->getNode(RHSEntryBlock);
- return LNode->getLevel() > RNode->getLevel();
- } else if (WrongOrder)
- return false;
- else if (RightOrder)
- return true;
-
- // If LHS does not non-strict Postdominate RHS and RHS does not non-strict
- // Postdominate LHS then, there is no dominance relationship between the
- // two FusionCandidates. Thus, they should not be in the same set together.
- llvm_unreachable(
- "No dominance relationship between these fusion candidates!");
- }
-};
} // namespace
using LoopVector = SmallVector<Loop *, 4>;
-// Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance
-// order. Thus, if FC0 comes *before* FC1 in a FusionCandidateSet, then FC0
-// dominates FC1 and FC1 post-dominates FC0.
-// std::set was chosen because we want a sorted data structure with stable
-// iterators. A subsequent patch to loop fusion will enable fusing non-adjacent
-// loops by moving intervening code around. When this intervening code contains
-// loops, those loops will be moved also. The corresponding FusionCandidates
-// will also need to be moved accordingly. As this is done, having stable
-// iterators will simplify the logic. Similarly, having an efficient insert that
-// keeps the FusionCandidateSet sorted will also simplify the implementation.
-using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
-using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
+// List of adjacent fusion candidates in order. Thus, if FC0 comes *before* FC1
+// in a FusionCandidateList, then FC0 dominates FC1, FC1 post-dominates FC0,
+// and they are adjacent.
+using FusionCandidateList = std::list<FusionCandidate>;
+using FusionCandidateCollection = SmallVector<FusionCandidateList, 4>;
#ifndef NDEBUG
static void printLoopVector(const LoopVector &LV) {
@@ -480,8 +405,8 @@ static raw_ostream &operator<<(raw_ostream &OS, const FusionCandidate &FC) {
}
static raw_ostream &operator<<(raw_ostream &OS,
- const FusionCandidateSet &CandSet) {
- for (const FusionCandidate &FC : CandSet)
+ const FusionCandidateList &CandList) {
+ for (const FusionCandidate &FC : CandList)
OS << FC << '\n';
return OS;
@@ -490,9 +415,9 @@ static raw_ostream &operator<<(raw_ostream &OS,
static void
printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
dbgs() << "Fusion Candidates: \n";
- for (const auto &CandidateSet : FusionCandidates) {
- dbgs() << "*** Fusion Candidate Set ***\n";
- dbgs() << CandidateSet;
+ for (const auto &CandidateList : FusionCandidates) {
+ dbgs() << "*** Fusion Candidate List ***\n";
+ dbgs() << CandidateList;
dbgs() << "****************************\n";
}
}
@@ -648,20 +573,6 @@ struct LoopFuser {
}
private:
- /// Determine if two fusion candidates are control flow equivalent.
- ///
- /// Two fusion candidates are control flow equivalent if when one executes,
- /// the other is guaranteed to execute. This is determined using dominators
- /// and post-dominators: if A dominates B and B post-dominates A then A and B
- /// are control-flow equivalent.
- bool isControlFlowEquivalent(const FusionCandidate &FC0,
- const FusionCandidate &FC1) const {
- assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
-
- return ::isControlFlowEquivalent(*FC0.getEntryBlock(), *FC1.getEntryBlock(),
- DT, PDT);
- }
-
/// Iterate over all loops in the given loop set and identify the loops that
/// are eligible for fusion. Place all eligible fusion candidates into Control
/// Flow Equivalent sets, sorted by dominance.
@@ -673,34 +584,42 @@ struct LoopFuser {
if (!CurrCand.isEligibleForFusion(SE))
continue;
- // Go through each list in FusionCandidates and determine if L is control
- // flow equivalent with the first loop in that list. If it is, append LV.
+ // Go through each list in FusionCandidates and determine if the first or
+ // last loop in the list is strictly adjacent to L. If it is, append L.
// If not, go to the next list.
// If no suitable list is found, start another list and add it to
// FusionCandidates.
- bool FoundSet = false;
-
- for (auto &CurrCandSet : FusionCandidates) {
- if (isControlFlowEquivalent(*CurrCandSet.begin(), CurrCand)) {
- CurrCandSet.insert(CurrCand);
- FoundSet = true;
+ bool FoundAdjacent = false;
+ for (auto &CurrCandList : FusionCandidates) {
+ if (isStrictlyAdjacent(CurrCand, CurrCandList.front())) {
+ CurrCandList.push_front(CurrCand);
+ FoundAdjacent = true;
#ifndef NDEBUG
if (VerboseFusionDebugging)
LLVM_DEBUG(dbgs() << "Adding " << CurrCand
- << " to existing candidate set\n");
+ << " to existing candidate list\n");
+#endif
+ break;
+ } else if (isStrictlyAdjacent(CurrCandList.back(), CurrCand)) {
+ CurrCandList.push_back(CurrCand);
+ FoundAdjacent = true;
+#ifndef NDEBUG
+ if (VerboseFusionDebugging)
+ LLVM_DEBUG(dbgs() << "Adding " << CurrCand
+ << " to existing candidate list\n");
#endif
break;
}
}
- if (!FoundSet) {
- // No set was found. Create a new set and add to FusionCandidates
+ if (!FoundAdjacent) {
+ // No list was found. Create a new list and add to FusionCandidates
#ifndef NDEBUG
if (VerboseFusionDebugging)
- LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");
+ LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new list\n");
#endif
- FusionCandidateSet NewCandSet;
- NewCandSet.insert(CurrCand);
- FusionCandidates.push_back(NewCandSet);
+ FusionCandidateList NewCandList;
+ NewCandList.push_back(CurrCand);
+ FusionCandidates.push_back(NewCandList);
}
NumFusionCandidates++;
}
@@ -849,218 +768,205 @@ struct LoopFuser {
}
}
- /// Walk each set of control flow equivalent fusion candidates and attempt to
- /// fuse them. This does a single linear traversal of all candidates in the
- /// set. The conditions for legal fusion are checked at this point. If a pair
- /// of fusion candidates passes all legality checks, they are fused together
- /// and a new fusion candidate is created and added to the FusionCandidateSet.
+ /// Walk each set of strictly adjacent fusion candidates and attempt to fuse
+ /// them. This does a single linear traversal of all candidates in the list.
+ /// The conditions for legal fusion are checked at this point. If a pair of
+ /// fusion candidates passes all legality checks, they are fused together and
+ /// a new fusion candidate is created and added to the FusionCandidateList.
/// The original fusion candidates are then removed, as they are no longer
/// valid.
bool fuseCandidates() {
bool Fused = false;
LLVM_DEBUG(printFusionCandidates(FusionCandidates));
- for (auto &CandidateSet : FusionCandidates) {
- if (CandidateSet.size() < 2)
+ for (auto &CandidateList : FusionCandidates) {
+ if (CandidateList.size() < 2)
continue;
- LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"
- << CandidateSet << "\n");
-
- for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
- assert(!LDT.isRemovedLoop(FC0->L) &&
- "Should not have removed loops in CandidateSet!");
- auto FC1 = FC0;
- for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
- assert(!LDT.isRemovedLoop(FC1->L) &&
- "Should not have removed loops in CandidateSet!");
-
- LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();
- dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");
-
- FC0->verify();
- FC1->verify();
-
- // Check if the candidates have identical tripcounts (first value of
- // pair), and if not check the
diff erence in the tripcounts between
- // the loops (second value of pair). The
diff erence is not equal to
- // std::nullopt iff the loops iterate a constant number of times, and
- // have a single exit.
- std::pair<bool, std::optional<unsigned>> IdenticalTripCountRes =
- haveIdenticalTripCounts(*FC0, *FC1);
- bool SameTripCount = IdenticalTripCountRes.first;
- std::optional<unsigned> TCDifference = IdenticalTripCountRes.second;
-
- // Here we are checking that FC0 (the first loop) can be peeled, and
- // both loops have
diff erent tripcounts.
- if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
- if (*TCDifference > FusionPeelMaxCount) {
- LLVM_DEBUG(dbgs()
- << "Difference in loop trip counts: " << *TCDifference
- << " is greater than maximum peel count specificed: "
- << FusionPeelMaxCount << "\n");
- } else {
- // Dependent on peeling being performed on the first loop, and
- // assuming all other conditions for fusion return true.
- SameTripCount = true;
- }
- }
+ LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate List:\n"
+ << CandidateList << "\n");
- if (!SameTripCount) {
- LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
- "counts. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- NonEqualTripCount);
- continue;
- }
+ for (auto It = CandidateList.begin(), NextIt = std::next(It);
+ NextIt != CandidateList.end(); It = NextIt, NextIt = std::next(It)) {
- if (!isAdjacent(*FC0, *FC1)) {
- LLVM_DEBUG(dbgs()
- << "Fusion candidates are not adjacent. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, NonAdjacent);
- continue;
- }
+ auto FC0 = *It;
+ auto FC1 = *NextIt;
- if ((!FC0->GuardBranch && FC1->GuardBranch) ||
- (FC0->GuardBranch && !FC1->GuardBranch)) {
- LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the "
- "another one is not. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(
- *FC0, *FC1, OnlySecondCandidateIsGuarded);
- continue;
- }
+ assert(!LDT.isRemovedLoop(FC0.L) &&
+ "Should not have removed loops in CandidateList!");
+ assert(!LDT.isRemovedLoop(FC1.L) &&
+ "Should not have removed loops in CandidateList!");
- // Ensure that FC0 and FC1 have identical guards.
- // If one (or both) are not guarded, this check is not necessary.
- if (FC0->GuardBranch && FC1->GuardBranch &&
- !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
- LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
- "guards. Not Fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- NonIdenticalGuards);
- continue;
- }
+ LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0.dump();
+ dbgs() << " with\n"; FC1.dump(); dbgs() << "\n");
- if (FC0->GuardBranch) {
- assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
-
- if (!isSafeToMoveBefore(*FC0->ExitBlock,
- *FC1->ExitBlock->getFirstNonPHIOrDbg(), DT,
- &PDT, &DI)) {
- LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
- "instructions in exit block. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- NonEmptyExitBlock);
- continue;
- }
+ FC0.verify();
+ FC1.verify();
- if (!isSafeToMoveBefore(
- *FC1->GuardBranch->getParent(),
- *FC0->GuardBranch->getParent()->getTerminator(), DT, &PDT,
- &DI)) {
- LLVM_DEBUG(dbgs()
- << "Fusion candidate contains unsafe "
- "instructions in guard block. Not fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- NonEmptyGuardBlock);
- continue;
- }
- }
-
- // Check the dependencies across the loops and do not fuse if it would
- // violate them.
- if (!dependencesAllowFusion(*FC0, *FC1)) {
- LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- InvalidDependencies);
- continue;
- }
+ // Check if the candidates have identical tripcounts (first value of
+ // pair), and if not check the
diff erence in the tripcounts between
+ // the loops (second value of pair). The
diff erence is not equal to
+ // std::nullopt iff the loops iterate a constant number of times, and
+ // have a single exit.
+ std::pair<bool, std::optional<unsigned>> IdenticalTripCountRes =
+ haveIdenticalTripCounts(FC0, FC1);
+ bool SameTripCount = IdenticalTripCountRes.first;
+ std::optional<unsigned> TCDifference = IdenticalTripCountRes.second;
- // If the second loop has instructions in the pre-header, attempt to
- // hoist them up to the first loop's pre-header or sink them into the
- // body of the second loop.
- SmallVector<Instruction *, 4> SafeToHoist;
- SmallVector<Instruction *, 4> SafeToSink;
- // At this point, this is the last remaining legality check.
- // Which means if we can make this pre-header empty, we can fuse
- // these loops
- if (!isEmptyPreheader(*FC1)) {
- LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
- "preheader.\n");
-
- // If it is not safe to hoist/sink all instructions in the
- // pre-header, we cannot fuse these loops.
- if (!collectMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,
- SafeToSink)) {
- LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
- "Fusion Candidate Pre-header.\n"
- << "Not Fusing.\n");
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- NonEmptyPreheader);
- continue;
- }
+ // Here we are checking that FC0 (the first loop) can be peeled, and
+ // both loops have
diff erent tripcounts.
+ if (FC0.AbleToPeel && !SameTripCount && TCDifference) {
+ if (*TCDifference > FusionPeelMaxCount) {
+ LLVM_DEBUG(dbgs()
+ << "Difference in loop trip counts: " << *TCDifference
+ << " is greater than maximum peel count specificed: "
+ << FusionPeelMaxCount << "\n");
+ } else {
+ // Dependent on peeling being performed on the first loop, and
+ // assuming all other conditions for fusion return true.
+ SameTripCount = true;
}
+ }
- bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
- LLVM_DEBUG(dbgs()
- << "\tFusion appears to be "
- << (BeneficialToFuse ? "" : "un") << "profitable!\n");
- if (!BeneficialToFuse) {
- reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
- FusionNotBeneficial);
- continue;
- }
- // All analysis has completed and has determined that fusion is legal
- // and profitable. At this point, start transforming the code and
- // perform fusion.
+ if (!SameTripCount) {
+ LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
+ "counts. Not fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+ NonEqualTripCount);
+ continue;
+ }
- // Execute the hoist/sink operations on preheader instructions
- movePreheaderInsts(*FC0, *FC1, SafeToHoist, SafeToSink);
+ if ((!FC0.GuardBranch && FC1.GuardBranch) ||
+ (FC0.GuardBranch && !FC1.GuardBranch)) {
+ LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the "
+ "another one is not. Not fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(
+ FC0, FC1, OnlySecondCandidateIsGuarded);
+ continue;
+ }
- LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
- << *FC1 << "\n");
+ // Ensure that FC0 and FC1 have identical guards.
+ // If one (or both) are not guarded, this check is not necessary.
+ if (FC0.GuardBranch && FC1.GuardBranch &&
+ !haveIdenticalGuards(FC0, FC1) && !TCDifference) {
+ LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
+ "guards. Not Fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+ NonIdenticalGuards);
+ continue;
+ }
- FusionCandidate FC0Copy = *FC0;
- // Peel the loop after determining that fusion is legal. The Loops
- // will still be safe to fuse after the peeling is performed.
- bool Peel = TCDifference && *TCDifference > 0;
- if (Peel)
- peelFusionCandidate(FC0Copy, *FC1, *TCDifference);
+ if (FC0.GuardBranch) {
+ assert(FC1.GuardBranch && "Expecting valid FC1 guard branch");
- // Report fusion to the Optimization Remarks.
- // Note this needs to be done *before* performFusion because
- // performFusion will change the original loops, making it not
- // possible to identify them after fusion is complete.
- reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1,
- FuseCounter);
+ if (!isSafeToMoveBefore(*FC0.ExitBlock,
+ *FC1.ExitBlock->getFirstNonPHIOrDbg(), DT,
+ &PDT, &DI)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
+ "instructions in exit block. Not fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+ NonEmptyExitBlock);
+ continue;
+ }
- FusionCandidate FusedCand(
- performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE,
- FC0Copy.PP);
- FusedCand.verify();
- assert(FusedCand.isEligibleForFusion(SE) &&
- "Fused candidate should be eligible for fusion!");
+ if (!isSafeToMoveBefore(
+ *FC1.GuardBranch->getParent(),
+ *FC0.GuardBranch->getParent()->getTerminator(), DT, &PDT,
+ &DI)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
+ "instructions in guard block. Not fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+ NonEmptyGuardBlock);
+ continue;
+ }
+ }
- // Notify the loop-depth-tree that these loops are not valid objects
- LDT.removeLoop(FC1->L);
+ // Check the dependencies across the loops and do not fuse if it would
+ // violate them.
+ if (!dependencesAllowFusion(FC0, FC1)) {
+ LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
+ reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+ InvalidDependencies);
+ continue;
+ }
- CandidateSet.erase(FC0);
- CandidateSet.erase(FC1);
+ // If the second loop has instructions in the pre-header, attempt to
+ // hoist them up to the first loop's pre-header or sink them into the
+ // body of the second loop.
+ SmallVector<Instruction *, 4> SafeToHoist;
+ SmallVector<Instruction *, 4> SafeToSink;
+ // At this point, this is the last remaining legality check.
+ // Which means if we can make this pre-header empty, we can fuse
+ // these loops
+ if (!isEmptyPreheader(FC1)) {
+ LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
+ "preheader.\n");
+
+ // If it is not safe to hoist/sink all instructions in the
+ // pre-header, we cannot fuse these loops.
+ if (!collectMovablePreheaderInsts(FC0, FC1, SafeToHoist,
+ SafeToSink)) {
+ LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
+ "Fusion Candidate Pre-header.\n"
+ << "Not Fusing.\n");
+ reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+ NonEmptyPreheader);
+ continue;
+ }
+ }
- auto InsertPos = CandidateSet.insert(FusedCand);
+ bool BeneficialToFuse = isBeneficialFusion(FC0, FC1);
+ LLVM_DEBUG(dbgs() << "\tFusion appears to be "
+ << (BeneficialToFuse ? "" : "un") << "profitable!\n");
+ if (!BeneficialToFuse) {
+ reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+ FusionNotBeneficial);
+ continue;
+ }
+ // All analysis has completed and has determined that fusion is legal
+ // and profitable. At this point, start transforming the code and
+ // perform fusion.
- assert(InsertPos.second &&
- "Unable to insert TargetCandidate in CandidateSet!");
+ // Execute the hoist/sink operations on preheader instructions
+ movePreheaderInsts(FC0, FC1, SafeToHoist, SafeToSink);
- // Reset FC0 and FC1 the new (fused) candidate. Subsequent iterations
- // of the FC1 loop will attempt to fuse the new (fused) loop with the
- // remaining candidates in the current candidate set.
- FC0 = FC1 = InsertPos.first;
+ LLVM_DEBUG(dbgs() << "\tFusion is performed: " << FC0 << " and " << FC1
+ << "\n");
- LLVM_DEBUG(dbgs() << "Candidate Set (after fusion): " << CandidateSet
- << "\n");
+ FusionCandidate FC0Copy = FC0;
+ // Peel the loop after determining that fusion is legal. The Loops
+ // will still be safe to fuse after the peeling is performed.
+ bool Peel = TCDifference && *TCDifference > 0;
+ if (Peel)
+ peelFusionCandidate(FC0Copy, FC1, *TCDifference);
+
+ // Report fusion to the Optimization Remarks.
+ // Note this needs to be done *before* performFusion because
+ // performFusion will change the original loops, making it not
+ // possible to identify them after fusion is complete.
+ reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : FC0), FC1,
+ FuseCounter);
+
+ FusionCandidate FusedCand(performFusion((Peel ? FC0Copy : FC0), FC1),
+ DT, &PDT, ORE, FC0Copy.PP);
+ FusedCand.verify();
+ assert(FusedCand.isEligibleForFusion(SE) &&
+ "Fused candidate should be eligible for fusion!");
+
+ // Notify the loop-depth-tree that these loops are not valid objects
+ LDT.removeLoop(FC1.L);
+
+ // Replace FC0 and FC1 with their fused loop
+ It = CandidateList.erase(It);
+ It = CandidateList.erase(It);
+ It = CandidateList.insert(It, FusedCand);
+
+ // Start from FusedCand in the next iteration
+ NextIt = It;
+
+ LLVM_DEBUG(dbgs() << "Candidate List (after fusion): " << CandidateList
+ << "\n");
- Fused = true;
- }
+ Fused = true;
}
}
return Fused;
@@ -1488,7 +1394,7 @@ struct LoopFuser {
return true;
}
- /// Determine if two fusion candidates are adjacent in the CFG.
+ /// Determine if two fusion candidates are strictly adjacent in the CFG.
///
/// This method will determine if there are additional basic blocks in the CFG
/// between the exit of \p FC0 and the entry of \p FC1.
@@ -1497,11 +1403,14 @@ struct LoopFuser {
/// FC1. If not, then the loops are not adjacent. If the two candidates are
/// not guarded loops, then it checks whether the exit block of \p FC0 is the
/// preheader of \p FC1.
- bool isAdjacent(const FusionCandidate &FC0,
- const FusionCandidate &FC1) const {
+ /// Strictly means there is no predecessor for FC1 unless it is from FC0,
+ /// i.e., FC0 dominates FC1.
+ bool isStrictlyAdjacent(const FusionCandidate &FC0,
+ const FusionCandidate &FC1) const {
// If the successor of the guard branch is FC1, then the loops are adjacent
if (FC0.GuardBranch)
- return FC0.getNonLoopBlock() == FC1.getEntryBlock();
+ return DT.dominates(FC0.getEntryBlock(), FC1.getEntryBlock()) &&
+ FC0.getNonLoopBlock() == FC1.getEntryBlock();
else
return FC0.ExitBlock == FC1.getEntryBlock();
}
diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
index 0e076c60d6085..8384d46837a7e 100644
--- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -25,8 +25,6 @@ using namespace llvm;
STATISTIC(HasDependences,
"Cannot move across instructions that has memory dependences");
STATISTIC(MayThrowException, "Cannot move across instructions that may throw");
-STATISTIC(NotControlFlowEquivalent,
- "Instructions are not control flow equivalent");
STATISTIC(NotMovedPHINode, "Movement of PHINodes are not supported");
STATISTIC(NotMovedTerminator, "Movement of Terminator are not supported");
@@ -228,44 +226,6 @@ bool ControlConditions::isInverse(const Value &V1, const Value &V2) {
return false;
}
-bool llvm::isControlFlowEquivalent(const Instruction &I0, const Instruction &I1,
- const DominatorTree &DT,
- const PostDominatorTree &PDT) {
- return isControlFlowEquivalent(*I0.getParent(), *I1.getParent(), DT, PDT);
-}
-
-bool llvm::isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1,
- const DominatorTree &DT,
- const PostDominatorTree &PDT) {
- if (&BB0 == &BB1)
- return true;
-
- if ((DT.dominates(&BB0, &BB1) && PDT.dominates(&BB1, &BB0)) ||
- (PDT.dominates(&BB0, &BB1) && DT.dominates(&BB1, &BB0)))
- return true;
-
- // If the set of conditions required to execute BB0 and BB1 from their common
- // dominator are the same, then BB0 and BB1 are control flow equivalent.
- const BasicBlock *CommonDominator = DT.findNearestCommonDominator(&BB0, &BB1);
- LLVM_DEBUG(dbgs() << "The nearest common dominator of " << BB0.getName()
- << " and " << BB1.getName() << " is "
- << CommonDominator->getName() << "\n");
-
- const std::optional<ControlConditions> BB0Conditions =
- ControlConditions::collectControlConditions(BB0, *CommonDominator, DT,
- PDT);
- if (BB0Conditions == std::nullopt)
- return false;
-
- const std::optional<ControlConditions> BB1Conditions =
- ControlConditions::collectControlConditions(BB1, *CommonDominator, DT,
- PDT);
- if (BB1Conditions == std::nullopt)
- return false;
-
- return BB0Conditions->isEquivalent(*BB1Conditions);
-}
-
static bool reportInvalidCandidate(const Instruction &I,
llvm::Statistic &Stat) {
++Stat;
@@ -330,10 +290,6 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
if (I.isTerminator())
return reportInvalidCandidate(I, NotMovedTerminator);
- // TODO remove this limitation.
- if (!isControlFlowEquivalent(I, InsertPoint, DT, *PDT))
- return reportInvalidCandidate(I, NotControlFlowEquivalent);
-
if (isReachedBefore(&I, &InsertPoint, &DT, PDT))
for (const Use &U : I.uses())
if (auto *UserInst = dyn_cast<Instruction>(U.getUser())) {
@@ -450,8 +406,6 @@ bool llvm::nonStrictlyPostDominate(const BasicBlock *ThisBlock,
const BasicBlock *OtherBlock,
const DominatorTree *DT,
const PostDominatorTree *PDT) {
- assert(isControlFlowEquivalent(*ThisBlock, *OtherBlock, *DT, *PDT) &&
- "ThisBlock and OtherBlock must be CFG equivalent!");
const BasicBlock *CommonDominator =
DT->findNearestCommonDominator(ThisBlock, OtherBlock);
if (CommonDominator == nullptr)
diff --git a/llvm/test/Transforms/LoopFusion/cannot_fuse.ll b/llvm/test/Transforms/LoopFusion/cannot_fuse.ll
index 6819ec377ab17..0eea73940904d 100644
--- a/llvm/test/Transforms/LoopFusion/cannot_fuse.ll
+++ b/llvm/test/Transforms/LoopFusion/cannot_fuse.ll
@@ -8,10 +8,10 @@
; CHECK: Performing Loop Fusion on function non_cfe
; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
; CHECK: bb
; CHECK: ****************************
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
; CHECK: bb20.preheader
; CHECK: ****************************
; CHECK: Loop Fusion complete
@@ -81,14 +81,12 @@ bb33: ; preds = %bb33.loopexit, %bb1
; CHECK: Performing Loop Fusion on function non_adjacent
; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
+; CHECK-NEXT: ****************************
+; CHECK: *** Fusion Candidate List ***
; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
-; CHECK-NEXT: [[LOOP1PREHEADER]]
-; CHECK-NEXT: [[LOOP2PREHEADER]]
-; CHECK: Fusion candidates are not adjacent. Not fusing.
; CHECK: Loop Fusion complete
define void @non_adjacent(ptr noalias %arg) {
bb:
@@ -143,11 +141,11 @@ bb25: ; preds = %bb15
; CHECK: Performing Loop Fusion on function
diff erent_bounds
; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
+; CHECK: Attempting fusion on Candidate List:
; CHECK-NEXT: [[LOOP1PREHEADER]]
; CHECK-NEXT: [[LOOP2PREHEADER]]
; CHECK: Fusion candidates do not have identical trip counts. Not fusing.
@@ -157,7 +155,7 @@ bb:
br label %bb5
bb4: ; preds = %bb11
- br label %bb13
+ br label %bb16
bb5: ; preds = %bb, %bb11
%.013 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
@@ -175,14 +173,11 @@ bb11: ; preds = %bb5
%exitcond2 = icmp ne i64 %tmp12, 100
br i1 %exitcond2, label %bb5, label %bb4
-bb13: ; preds = %bb4
- br label %bb16
-
bb15: ; preds = %bb23
br label %bb25
-bb16: ; preds = %bb13, %bb23
- %.02 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ]
+bb16: ; preds = %bb4, %bb23
+ %.02 = phi i64 [ 0, %bb4 ], [ %tmp24, %bb23 ]
%tmp17 = add nsw i64 %.02, -3
%tmp18 = add nuw nsw i64 %.02, 3
%tmp19 = mul nsw i64 %tmp17, %tmp18
@@ -206,11 +201,11 @@ bb25: ; preds = %bb15
; CHECK: Performing Loop Fusion on function negative_dependence
; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
+; CHECK: Attempting fusion on Candidate List:
; CHECK-NEXT: [[LOOP1PREHEADER]]
; CHECK-NEXT: [[LOOP2PREHEADER]]
; CHECK: Memory dependencies do not allow fusion!
@@ -260,11 +255,11 @@ bb19: ; preds = %bb18
; CHECK: Performing Loop Fusion on function sumTest
; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
+; CHECK: Attempting fusion on Candidate List:
; CHECK-NEXT: [[LOOP1PREHEADER]]
; CHECK-NEXT: [[LOOP2PREHEADER]]
; CHECK: Memory dependencies do not allow fusion!
@@ -314,11 +309,11 @@ bb21: ; preds = %bb14
; CHECK: Performing Loop Fusion on function test
; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
; CHECK-NEXT: [[LOOP1PREHEADER:for.body[0-9]*.preheader]]
; CHECK-NEXT: [[LOOP2PREHEADER:for.body[0-9]*.preheader]]
; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
+; CHECK: Attempting fusion on Candidate List:
; CHECK-NEXT: [[LOOP1PREHEADER]]
; CHECK-NEXT: [[LOOP2PREHEADER]]
; CHECK: Memory dependencies do not allow fusion!
diff --git a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
index f30a070153742..711d462da37d7 100644
--- a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
+++ b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
@@ -5,64 +5,11 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@B = common global [1024 x i32] zeroinitializer, align 16, !dbg !0
-; CHECK: remark: diagnostics_missed.c:18:3: [non_adjacent]: entry and for.end: Loops are not adjacent
-define void @non_adjacent(ptr noalias %A) !dbg !14 {
-entry:
- br label %for.body
-
-for.cond.cleanup: ; preds = %for.inc
- br label %for.end
-
-for.body: ; preds = %entry, %for.inc
- %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
- %sub = add nsw i64 %i.02, -3
- %add = add nuw nsw i64 %i.02, 3
- %mul = mul nsw i64 %sub, %add
- %rem = srem i64 %mul, %i.02
- %conv = trunc i64 %rem to i32
- %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.02
- store i32 %conv, ptr %arrayidx, align 4
- br label %for.inc
-
-for.inc: ; preds = %for.body
- %inc = add nuw nsw i64 %i.02, 1, !dbg !26
- %exitcond1 = icmp ne i64 %inc, 100
- br i1 %exitcond1, label %for.body, label %for.cond.cleanup, !llvm.loop !28
-
-for.end: ; preds = %for.cond.cleanup
- br label %for.body6
-
-for.cond.cleanup5: ; preds = %for.inc13
- br label %for.end15
-
-for.body6: ; preds = %for.end, %for.inc13
- %i1.01 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ]
- %sub7 = add nsw i64 %i1.01, -3
- %add8 = add nuw nsw i64 %i1.01, 3
- %mul9 = mul nsw i64 %sub7, %add8
- %rem10 = srem i64 %mul9, %i1.01
- %conv11 = trunc i64 %rem10 to i32
- %arrayidx12 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %i1.01
- store i32 %conv11, ptr %arrayidx12, align 4
- br label %for.inc13
-
-for.inc13: ; preds = %for.body6
- %inc14 = add nuw nsw i64 %i1.01, 1, !dbg !31
- %exitcond = icmp ne i64 %inc14, 100
- br i1 %exitcond, label %for.body6, label %for.cond.cleanup5, !llvm.loop !33
-
-for.end15: ; preds = %for.cond.cleanup5
- ret void
-}
-
; CHECK: remark: diagnostics_missed.c:28:3: [
diff erent_bounds]: entry and for.end: Loop trip counts are not the same
define void @
diff erent_bounds(ptr noalias %A) !dbg !36 {
entry:
br label %for.body
-for.cond.cleanup: ; preds = %for.inc
- br label %for.end
-
for.body: ; preds = %entry, %for.inc
%i.02 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
%sub = add nsw i64 %i.02, -3
@@ -77,9 +24,9 @@ for.body: ; preds = %entry, %for.inc
for.inc: ; preds = %for.body
%inc = add nuw nsw i64 %i.02, 1, !dbg !43
%exitcond1 = icmp ne i64 %inc, 100
- br i1 %exitcond1, label %for.body, label %for.cond.cleanup, !llvm.loop !45
+ br i1 %exitcond1, label %for.body, label %for.end, !llvm.loop !45
-for.end: ; preds = %for.cond.cleanup
+for.end: ; preds = %for.inc
br label %for.body6
for.cond.cleanup5: ; preds = %for.inc13
diff --git a/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp b/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp
index 191ccc3a9dbd9..c2a8045e86dab 100644
--- a/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp
@@ -63,373 +63,6 @@ static Instruction *getInstructionByName(Function &F, StringRef Name) {
llvm_unreachable("Expected to find instruction!");
}
-TEST(CodeMoverUtils, IsControlFlowEquivalentSimpleTest) {
- LLVMContext C;
-
- // void foo(int &i, bool cond1, bool cond2) {
- // if (cond1)
- // i = 1;
- // if (cond1)
- // i = 2;
- // if (cond2)
- // i = 3;
- // }
- std::unique_ptr<Module> M =
- parseIR(C, R"(define void @foo(ptr %i, i1 %cond1, i1 %cond2) {
- entry:
- br i1 %cond1, label %if.first, label %if.first.end
- if.first:
- store i32 1, ptr %i, align 4
- br label %if.first.end
- if.first.end:
- br i1 %cond1, label %if.second, label %if.second.end
- if.second:
- store i32 2, ptr %i, align 4
- br label %if.second.end
- if.second.end:
- br i1 %cond2, label %if.third, label %if.third.end
- if.third:
- store i32 3, ptr %i, align 4
- br label %if.third.end
- if.third.end:
- ret void
- })");
- run(*M, "foo",
- [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
- DependenceInfo &DI) {
- BasicBlock *FirstIfBody = getBasicBlockByName(F, "if.first");
- EXPECT_TRUE(
- isControlFlowEquivalent(*FirstIfBody, *FirstIfBody, DT, PDT));
- BasicBlock *SecondIfBody = getBasicBlockByName(F, "if.second");
- EXPECT_TRUE(
- isControlFlowEquivalent(*FirstIfBody, *SecondIfBody, DT, PDT));
-
- BasicBlock *ThirdIfBody = getBasicBlockByName(F, "if.third");
- EXPECT_FALSE(
- isControlFlowEquivalent(*FirstIfBody, *ThirdIfBody, DT, PDT));
- EXPECT_FALSE(
- isControlFlowEquivalent(*SecondIfBody, *ThirdIfBody, DT, PDT));
- });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentOppositeCondTest) {
- LLVMContext C;
-
- // void foo(int &i, unsigned X, unsigned Y) {
- // if (X < Y)
- // i = 1;
- // if (Y > X)
- // i = 2;
- // if (X >= Y)
- // i = 3;
- // else
- // i = 4;
- // if (X == Y)
- // i = 5;
- // if (Y == X)
- // i = 6;
- // else
- // i = 7;
- // if (X != Y)
- // i = 8;
- // else
- // i = 9;
- // }
- std::unique_ptr<Module> M =
- parseIR(C, R"(define void @foo(ptr %i, i32 %X, i32 %Y) {
- entry:
- %cmp1 = icmp ult i32 %X, %Y
- br i1 %cmp1, label %if.first, label %if.first.end
- if.first:
- store i32 1, ptr %i, align 4
- br label %if.first.end
- if.first.end:
- %cmp2 = icmp ugt i32 %Y, %X
- br i1 %cmp2, label %if.second, label %if.second.end
- if.second:
- store i32 2, ptr %i, align 4
- br label %if.second.end
- if.second.end:
- %cmp3 = icmp uge i32 %X, %Y
- br i1 %cmp3, label %if.third, label %if.third.else
- if.third:
- store i32 3, ptr %i, align 4
- br label %if.third.end
- if.third.else:
- store i32 4, ptr %i, align 4
- br label %if.third.end
- if.third.end:
- %cmp4 = icmp eq i32 %X, %Y
- br i1 %cmp4, label %if.fourth, label %if.fourth.end
- if.fourth:
- store i32 5, ptr %i, align 4
- br label %if.fourth.end
- if.fourth.end:
- %cmp5 = icmp eq i32 %Y, %X
- br i1 %cmp5, label %if.fifth, label %if.fifth.else
- if.fifth:
- store i32 6, ptr %i, align 4
- br label %if.fifth.end
- if.fifth.else:
- store i32 7, ptr %i, align 4
- br label %if.fifth.end
- if.fifth.end:
- %cmp6 = icmp ne i32 %X, %Y
- br i1 %cmp6, label %if.sixth, label %if.sixth.else
- if.sixth:
- store i32 8, ptr %i, align 4
- br label %if.sixth.end
- if.sixth.else:
- store i32 9, ptr %i, align 4
- br label %if.sixth.end
- if.sixth.end:
- ret void
- })");
- run(*M, "foo",
- [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
- DependenceInfo &DI) {
- BasicBlock *FirstIfBody = getBasicBlockByName(F, "if.first");
- BasicBlock *SecondIfBody = getBasicBlockByName(F, "if.second");
- BasicBlock *ThirdIfBody = getBasicBlockByName(F, "if.third");
- BasicBlock *ThirdElseBody = getBasicBlockByName(F, "if.third.else");
- EXPECT_TRUE(
- isControlFlowEquivalent(*FirstIfBody, *ThirdElseBody, DT, PDT));
- EXPECT_TRUE(
- isControlFlowEquivalent(*SecondIfBody, *ThirdElseBody, DT, PDT));
- EXPECT_FALSE(
- isControlFlowEquivalent(*ThirdIfBody, *ThirdElseBody, DT, PDT));
-
- BasicBlock *FourthIfBody = getBasicBlockByName(F, "if.fourth");
- BasicBlock *FifthIfBody = getBasicBlockByName(F, "if.fifth");
- BasicBlock *FifthElseBody = getBasicBlockByName(F, "if.fifth.else");
- EXPECT_FALSE(
- isControlFlowEquivalent(*FifthIfBody, *FifthElseBody, DT, PDT));
- BasicBlock *SixthIfBody = getBasicBlockByName(F, "if.sixth");
- EXPECT_TRUE(
- isControlFlowEquivalent(*FifthElseBody, *SixthIfBody, DT, PDT));
- BasicBlock *SixthElseBody = getBasicBlockByName(F, "if.sixth.else");
- EXPECT_TRUE(
- isControlFlowEquivalent(*FourthIfBody, *SixthElseBody, DT, PDT));
- EXPECT_TRUE(
- isControlFlowEquivalent(*FifthIfBody, *SixthElseBody, DT, PDT));
- });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentCondNestTest) {
- LLVMContext C;
-
- // void foo(int &i, bool cond1, bool cond2) {
- // if (cond1)
- // if (cond2)
- // i = 1;
- // if (cond2)
- // if (cond1)
- // i = 2;
- // }
- std::unique_ptr<Module> M =
- parseIR(C, R"(define void @foo(ptr %i, i1 %cond1, i1 %cond2) {
- entry:
- br i1 %cond1, label %if.outer.first, label %if.first.end
- if.outer.first:
- br i1 %cond2, label %if.inner.first, label %if.first.end
- if.inner.first:
- store i32 1, ptr %i, align 4
- br label %if.first.end
- if.first.end:
- br i1 %cond2, label %if.outer.second, label %if.second.end
- if.outer.second:
- br i1 %cond1, label %if.inner.second, label %if.second.end
- if.inner.second:
- store i32 2, ptr %i, align 4
- br label %if.second.end
- if.second.end:
- ret void
- })");
- run(*M, "foo",
- [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
- DependenceInfo &DI) {
- BasicBlock *FirstOuterIfBody = getBasicBlockByName(F, "if.outer.first");
- BasicBlock *FirstInnerIfBody = getBasicBlockByName(F, "if.inner.first");
- BasicBlock *SecondOuterIfBody =
- getBasicBlockByName(F, "if.outer.second");
- BasicBlock *SecondInnerIfBody =
- getBasicBlockByName(F, "if.inner.second");
- EXPECT_TRUE(isControlFlowEquivalent(*FirstInnerIfBody,
- *SecondInnerIfBody, DT, PDT));
- EXPECT_FALSE(isControlFlowEquivalent(*FirstOuterIfBody,
- *SecondOuterIfBody, DT, PDT));
- EXPECT_FALSE(isControlFlowEquivalent(*FirstOuterIfBody,
- *SecondInnerIfBody, DT, PDT));
- EXPECT_FALSE(isControlFlowEquivalent(*FirstInnerIfBody,
- *SecondOuterIfBody, DT, PDT));
- });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentImbalanceTest) {
- LLVMContext C;
-
- // void foo(int &i, bool cond1, bool cond2) {
- // if (cond1)
- // if (cond2)
- // if (cond3)
- // i = 1;
- // if (cond2)
- // if (cond3)
- // i = 2;
- // if (cond1)
- // if (cond1)
- // i = 3;
- // if (cond1)
- // i = 4;
- // }
- std::unique_ptr<Module> M =
- parseIR(C, R"(define void @foo(ptr %i, i1 %cond1, i1 %cond2, i1 %cond3) {
- entry:
- br i1 %cond1, label %if.outer.first, label %if.first.end
- if.outer.first:
- br i1 %cond2, label %if.middle.first, label %if.first.end
- if.middle.first:
- br i1 %cond3, label %if.inner.first, label %if.first.end
- if.inner.first:
- store i32 1, ptr %i, align 4
- br label %if.first.end
- if.first.end:
- br i1 %cond2, label %if.outer.second, label %if.second.end
- if.outer.second:
- br i1 %cond3, label %if.inner.second, label %if.second.end
- if.inner.second:
- store i32 2, ptr %i, align 4
- br label %if.second.end
- if.second.end:
- br i1 %cond1, label %if.outer.third, label %if.third.end
- if.outer.third:
- br i1 %cond1, label %if.inner.third, label %if.third.end
- if.inner.third:
- store i32 3, ptr %i, align 4
- br label %if.third.end
- if.third.end:
- br i1 %cond1, label %if.fourth, label %if.fourth.end
- if.fourth:
- store i32 4, ptr %i, align 4
- br label %if.fourth.end
- if.fourth.end:
- ret void
- })");
- run(*M, "foo",
- [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
- DependenceInfo &DI) {
- BasicBlock *FirstIfBody = getBasicBlockByName(F, "if.inner.first");
- BasicBlock *SecondIfBody = getBasicBlockByName(F, "if.inner.second");
- EXPECT_FALSE(
- isControlFlowEquivalent(*FirstIfBody, *SecondIfBody, DT, PDT));
-
- BasicBlock *ThirdIfBody = getBasicBlockByName(F, "if.inner.third");
- BasicBlock *FourthIfBody = getBasicBlockByName(F, "if.fourth");
- EXPECT_TRUE(
- isControlFlowEquivalent(*ThirdIfBody, *FourthIfBody, DT, PDT));
- });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentPointerTest) {
- LLVMContext C;
-
- // void foo(int &i, int *cond) {
- // if (*cond)
- // i = 1;
- // if (*cond)
- // i = 2;
- // *cond = 1;
- // if (*cond)
- // i = 3;
- // }
- std::unique_ptr<Module> M =
- parseIR(C, R"(define void @foo(ptr %i, ptr %cond) {
- entry:
- %0 = load i32, ptr %cond, align 4
- %tobool1 = icmp ne i32 %0, 0
- br i1 %tobool1, label %if.first, label %if.first.end
- if.first:
- store i32 1, ptr %i, align 4
- br label %if.first.end
- if.first.end:
- %1 = load i32, ptr %cond, align 4
- %tobool2 = icmp ne i32 %1, 0
- br i1 %tobool2, label %if.second, label %if.second.end
- if.second:
- store i32 2, ptr %i, align 4
- br label %if.second.end
- if.second.end:
- store i32 1, ptr %cond, align 4
- %2 = load i32, ptr %cond, align 4
- %tobool3 = icmp ne i32 %2, 0
- br i1 %tobool3, label %if.third, label %if.third.end
- if.third:
- store i32 3, ptr %i, align 4
- br label %if.third.end
- if.third.end:
- ret void
- })");
- run(*M, "foo",
- [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
- DependenceInfo &DI) {
- BasicBlock *FirstIfBody = getBasicBlockByName(F, "if.first");
- BasicBlock *SecondIfBody = getBasicBlockByName(F, "if.second");
- // Limitation: if we can prove cond haven't been modify between %0 and
- // %1, then we can prove FirstIfBody and SecondIfBody are control flow
- // equivalent.
- EXPECT_FALSE(
- isControlFlowEquivalent(*FirstIfBody, *SecondIfBody, DT, PDT));
-
- BasicBlock *ThirdIfBody = getBasicBlockByName(F, "if.third");
- EXPECT_FALSE(
- isControlFlowEquivalent(*FirstIfBody, *ThirdIfBody, DT, PDT));
- EXPECT_FALSE(
- isControlFlowEquivalent(*SecondIfBody, *ThirdIfBody, DT, PDT));
- });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentNotPostdomTest) {
- LLVMContext C;
-
- // void foo(bool cond1, bool cond2) {
- // if (cond1) {
- // if (cond2)
- // return;
- // } else
- // if (cond2)
- // return;
- // return;
- // }
- std::unique_ptr<Module> M =
- parseIR(C, R"(define void @foo(i1 %cond1, i1 %cond2) {
- idom:
- br i1 %cond1, label %succ0, label %succ1
- succ0:
- br i1 %cond2, label %succ0ret, label %succ0succ1
- succ0ret:
- ret void
- succ0succ1:
- br label %bb
- succ1:
- br i1 %cond2, label %succ1ret, label %succ1succ1
- succ1ret:
- ret void
- succ1succ1:
- br label %bb
- bb:
- ret void
- })");
- run(*M, "foo",
- [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
- DependenceInfo &DI) {
- BasicBlock &Idom = F.front();
- assert(Idom.getName() == "idom" && "Expecting BasicBlock idom");
- BasicBlock &BB = F.back();
- assert(BB.getName() == "bb" && "Expecting BasicBlock bb");
- EXPECT_FALSE(isControlFlowEquivalent(Idom, BB, DT, PDT));
- });
-}
-
TEST(CodeMoverUtils, IsSafeToMoveTest1) {
LLVMContext C;
@@ -514,11 +147,6 @@ TEST(CodeMoverUtils, IsSafeToMoveTest1) {
EXPECT_FALSE(isSafeToMoveBefore(*CI_unsafecall->getNextNode(),
*CI_unsafecall, DT, &PDT, &DI));
- // Moving instruction to non control flow equivalent places are not
- // supported.
- EXPECT_FALSE(
- isSafeToMoveBefore(*SI_A5, *Entry->getTerminator(), DT, &PDT, &DI));
-
// Moving PHINode is not supported.
EXPECT_FALSE(isSafeToMoveBefore(PN, *PN.getNextNode()->getNextNode(),
DT, &PDT, &DI));
More information about the llvm-commits
mailing list