[llvm] [LoopFusion] Simplifying the legality checks (PR #171889)

Alireza Torabian via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 12 11:18:46 PST 2025


https://github.com/1997alireza updated https://github.com/llvm/llvm-project/pull/171889

>From 853cadcc201a7392d55609fad0fffdc373557146 Mon Sep 17 00:00:00 2001
From: Alireza Torabian <alireza.torabian at huawei.com>
Date: Thu, 11 Dec 2025 13:30:46 -0500
Subject: [PATCH] [LoopFusion] Simplifying the legality checks

Considering that the current loop fusion only supports adjacent
loops, we are able to simplify the checks in this pass. By removing
isControlFlowEquivalent check, this patch fixes multiple issues
including #166560, #166535, #165031, #80301 and #168263.
---
 .../llvm/Transforms/Utils/CodeMoverUtils.h    |  16 -
 llvm/lib/Transforms/Scalar/LoopFuse.cpp       | 521 ++++++++----------
 llvm/lib/Transforms/Utils/CodeMoverUtils.cpp  |  46 --
 .../test/Transforms/LoopFusion/cannot_fuse.ll |  37 +-
 .../LoopFusion/diagnostics_missed.ll          |  57 +-
 .../Transforms/Utils/CodeMoverUtilsTest.cpp   | 372 -------------
 6 files changed, 233 insertions(+), 816 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
index 877872485ab58..d473f7092f62e 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
@@ -27,22 +27,6 @@ class DominatorTree;
 class Instruction;
 class PostDominatorTree;
 
-/// Return true if \p I0 and \p I1 are control flow equivalent.
-/// Two instructions are control flow equivalent if their basic blocks are
-/// control flow equivalent.
-LLVM_ABI bool isControlFlowEquivalent(const Instruction &I0,
-                                      const Instruction &I1,
-                                      const DominatorTree &DT,
-                                      const PostDominatorTree &PDT);
-
-/// Return true if \p BB0 and \p BB1 are control flow equivalent.
-/// Two basic blocks are control flow equivalent if when one executes, the other
-/// is guaranteed to execute.
-LLVM_ABI bool isControlFlowEquivalent(const BasicBlock &BB0,
-                                      const BasicBlock &BB1,
-                                      const DominatorTree &DT,
-                                      const PostDominatorTree &PDT);
-
 /// Return true if \p I can be safely moved before \p InsertPoint.
 LLVM_ABI bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
                                  DominatorTree &DT,
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 9ffa602416b05..3a06c3f00fa02 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -64,6 +64,7 @@
 #include "llvm/Transforms/Utils/CodeMoverUtils.h"
 #include "llvm/Transforms/Utils/LoopPeel.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
+#include <list>
 
 using namespace llvm;
 
@@ -85,7 +86,6 @@ STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
 STATISTIC(UnknownTripCount, "Loop has unknown trip count");
 STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
 STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
-STATISTIC(NonAdjacent, "Loops are not adjacent");
 STATISTIC(
     NonEmptyPreheader,
     "Loop has a non-empty preheader with instructions that cannot be moved");
@@ -174,10 +174,6 @@ struct FusionCandidate {
   /// Has this loop been Peeled
   bool Peeled;
 
-  /// Dominator and PostDominator trees are needed for the
-  /// FusionCandidateCompare function, required by FusionCandidateSet to
-  /// determine where the FusionCandidate should be inserted into the set. These
-  /// are used to establish ordering of the FusionCandidates based on dominance.
   DominatorTree &DT;
   const PostDominatorTree *PDT;
 
@@ -358,10 +354,10 @@ struct FusionCandidate {
 private:
   // This is only used internally for now, to clear the MemWrites and MemReads
   // list and setting Valid to false. I can't envision other uses of this right
-  // now, since once FusionCandidates are put into the FusionCandidateSet they
+  // now, since once FusionCandidates are put into the FusionCandidateList they
   // are immutable. Thus, any time we need to change/update a FusionCandidate,
-  // we must create a new one and insert it into the FusionCandidateSet to
-  // ensure the FusionCandidateSet remains ordered correctly.
+  // we must create a new one and insert it into the FusionCandidateList to
+  // ensure the FusionCandidateList remains ordered correctly.
   void invalidate() {
     MemWrites.clear();
     MemReads.clear();
@@ -381,86 +377,15 @@ struct FusionCandidate {
     return false;
   }
 };
-
-struct FusionCandidateCompare {
-  /// Comparison functor to sort two Control Flow Equivalent fusion candidates
-  /// into dominance order.
-  /// If LHS dominates RHS and RHS post-dominates LHS, return true;
-  /// If RHS dominates LHS and LHS post-dominates RHS, return false;
-  /// If both LHS and RHS are not dominating each other then, non-strictly
-  /// post dominate check will decide the order of candidates. If RHS
-  /// non-strictly post dominates LHS then, return true. If LHS non-strictly
-  /// post dominates RHS then, return false. If both are non-strictly post
-  /// dominate each other then, level in the post dominator tree will decide
-  /// the order of candidates.
-  bool operator()(const FusionCandidate &LHS,
-                  const FusionCandidate &RHS) const {
-    const DominatorTree *DT = &(LHS.DT);
-
-    BasicBlock *LHSEntryBlock = LHS.getEntryBlock();
-    BasicBlock *RHSEntryBlock = RHS.getEntryBlock();
-
-    // Do not save PDT to local variable as it is only used in asserts and thus
-    // will trigger an unused variable warning if building without asserts.
-    assert(DT && LHS.PDT && "Expecting valid dominator tree");
-
-    // Do this compare first so if LHS == RHS, function returns false.
-    if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) {
-      // RHS dominates LHS
-      // Verify LHS post-dominates RHS
-      assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));
-      return false;
-    }
-
-    if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) {
-      // Verify RHS Postdominates LHS
-      assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));
-      return true;
-    }
-
-    // If two FusionCandidates are in the same level of dominator tree,
-    // they will not dominate each other, but may still be control flow
-    // equivalent. To sort those FusionCandidates, nonStrictlyPostDominate()
-    // function is needed.
-    bool WrongOrder =
-        nonStrictlyPostDominate(LHSEntryBlock, RHSEntryBlock, DT, LHS.PDT);
-    bool RightOrder =
-        nonStrictlyPostDominate(RHSEntryBlock, LHSEntryBlock, DT, LHS.PDT);
-    if (WrongOrder && RightOrder) {
-      // If common predecessor of LHS and RHS post dominates both
-      // FusionCandidates then, Order of FusionCandidate can be
-      // identified by its level in post dominator tree.
-      DomTreeNode *LNode = LHS.PDT->getNode(LHSEntryBlock);
-      DomTreeNode *RNode = LHS.PDT->getNode(RHSEntryBlock);
-      return LNode->getLevel() > RNode->getLevel();
-    } else if (WrongOrder)
-      return false;
-    else if (RightOrder)
-      return true;
-
-    // If LHS does not non-strict Postdominate RHS and RHS does not non-strict
-    // Postdominate LHS then, there is no dominance relationship between the
-    // two FusionCandidates. Thus, they should not be in the same set together.
-    llvm_unreachable(
-        "No dominance relationship between these fusion candidates!");
-  }
-};
 } // namespace
 
 using LoopVector = SmallVector<Loop *, 4>;
 
-// Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance
-// order. Thus, if FC0 comes *before* FC1 in a FusionCandidateSet, then FC0
-// dominates FC1 and FC1 post-dominates FC0.
-// std::set was chosen because we want a sorted data structure with stable
-// iterators. A subsequent patch to loop fusion will enable fusing non-adjacent
-// loops by moving intervening code around. When this intervening code contains
-// loops, those loops will be moved also. The corresponding FusionCandidates
-// will also need to be moved accordingly. As this is done, having stable
-// iterators will simplify the logic. Similarly, having an efficient insert that
-// keeps the FusionCandidateSet sorted will also simplify the implementation.
-using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
-using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
+// List of adjacent fusion candidates in order. Thus, if FC0 comes *before* FC1
+// in a FusionCandidateList, then FC0 dominates FC1, FC1 post-dominates FC0,
+// and they are adjacent.
+using FusionCandidateList = std::list<FusionCandidate>;
+using FusionCandidateCollection = SmallVector<FusionCandidateList, 4>;
 
 #ifndef NDEBUG
 static void printLoopVector(const LoopVector &LV) {
@@ -480,8 +405,8 @@ static raw_ostream &operator<<(raw_ostream &OS, const FusionCandidate &FC) {
 }
 
 static raw_ostream &operator<<(raw_ostream &OS,
-                               const FusionCandidateSet &CandSet) {
-  for (const FusionCandidate &FC : CandSet)
+                               const FusionCandidateList &CandList) {
+  for (const FusionCandidate &FC : CandList)
     OS << FC << '\n';
 
   return OS;
@@ -490,9 +415,9 @@ static raw_ostream &operator<<(raw_ostream &OS,
 static void
 printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
   dbgs() << "Fusion Candidates: \n";
-  for (const auto &CandidateSet : FusionCandidates) {
-    dbgs() << "*** Fusion Candidate Set ***\n";
-    dbgs() << CandidateSet;
+  for (const auto &CandidateList : FusionCandidates) {
+    dbgs() << "*** Fusion Candidate List ***\n";
+    dbgs() << CandidateList;
     dbgs() << "****************************\n";
   }
 }
@@ -648,20 +573,6 @@ struct LoopFuser {
   }
 
 private:
-  /// Determine if two fusion candidates are control flow equivalent.
-  ///
-  /// Two fusion candidates are control flow equivalent if when one executes,
-  /// the other is guaranteed to execute. This is determined using dominators
-  /// and post-dominators: if A dominates B and B post-dominates A then A and B
-  /// are control-flow equivalent.
-  bool isControlFlowEquivalent(const FusionCandidate &FC0,
-                               const FusionCandidate &FC1) const {
-    assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
-
-    return ::isControlFlowEquivalent(*FC0.getEntryBlock(), *FC1.getEntryBlock(),
-                                     DT, PDT);
-  }
-
   /// Iterate over all loops in the given loop set and identify the loops that
   /// are eligible for fusion. Place all eligible fusion candidates into Control
   /// Flow Equivalent sets, sorted by dominance.
@@ -673,34 +584,42 @@ struct LoopFuser {
       if (!CurrCand.isEligibleForFusion(SE))
         continue;
 
-      // Go through each list in FusionCandidates and determine if L is control
-      // flow equivalent with the first loop in that list. If it is, append LV.
+      // Go through each list in FusionCandidates and determine if the first or
+      // last loop in the list is strictly adjacent to L. If it is, append L.
       // If not, go to the next list.
       // If no suitable list is found, start another list and add it to
       // FusionCandidates.
-      bool FoundSet = false;
-
-      for (auto &CurrCandSet : FusionCandidates) {
-        if (isControlFlowEquivalent(*CurrCandSet.begin(), CurrCand)) {
-          CurrCandSet.insert(CurrCand);
-          FoundSet = true;
+      bool FoundAdjacent = false;
+      for (auto &CurrCandList : FusionCandidates) {
+        if (isStrictlyAdjacent(CurrCand, CurrCandList.front())) {
+          CurrCandList.push_front(CurrCand);
+          FoundAdjacent = true;
 #ifndef NDEBUG
           if (VerboseFusionDebugging)
             LLVM_DEBUG(dbgs() << "Adding " << CurrCand
-                              << " to existing candidate set\n");
+                              << " to existing candidate list\n");
+#endif
+          break;
+        } else if (isStrictlyAdjacent(CurrCandList.back(), CurrCand)) {
+          CurrCandList.push_back(CurrCand);
+          FoundAdjacent = true;
+#ifndef NDEBUG
+          if (VerboseFusionDebugging)
+            LLVM_DEBUG(dbgs() << "Adding " << CurrCand
+                              << " to existing candidate list\n");
 #endif
           break;
         }
       }
-      if (!FoundSet) {
-        // No set was found. Create a new set and add to FusionCandidates
+      if (!FoundAdjacent) {
+        // No list was found. Create a new list and add to FusionCandidates
 #ifndef NDEBUG
         if (VerboseFusionDebugging)
-          LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");
+          LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new list\n");
 #endif
-        FusionCandidateSet NewCandSet;
-        NewCandSet.insert(CurrCand);
-        FusionCandidates.push_back(NewCandSet);
+        FusionCandidateList NewCandList;
+        NewCandList.push_back(CurrCand);
+        FusionCandidates.push_back(NewCandList);
       }
       NumFusionCandidates++;
     }
@@ -849,218 +768,205 @@ struct LoopFuser {
     }
   }
 
-  /// Walk each set of control flow equivalent fusion candidates and attempt to
-  /// fuse them. This does a single linear traversal of all candidates in the
-  /// set. The conditions for legal fusion are checked at this point. If a pair
-  /// of fusion candidates passes all legality checks, they are fused together
-  /// and a new fusion candidate is created and added to the FusionCandidateSet.
+  /// Walk each set of strictly adjacent fusion candidates and attempt to fuse
+  /// them. This does a single linear traversal of all candidates in the list.
+  /// The conditions for legal fusion are checked at this point. If a pair of
+  /// fusion candidates passes all legality checks, they are fused together and
+  /// a new fusion candidate is created and added to the FusionCandidateList.
   /// The original fusion candidates are then removed, as they are no longer
   /// valid.
   bool fuseCandidates() {
     bool Fused = false;
     LLVM_DEBUG(printFusionCandidates(FusionCandidates));
-    for (auto &CandidateSet : FusionCandidates) {
-      if (CandidateSet.size() < 2)
+    for (auto &CandidateList : FusionCandidates) {
+      if (CandidateList.size() < 2)
         continue;
 
-      LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"
-                        << CandidateSet << "\n");
-
-      for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
-        assert(!LDT.isRemovedLoop(FC0->L) &&
-               "Should not have removed loops in CandidateSet!");
-        auto FC1 = FC0;
-        for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
-          assert(!LDT.isRemovedLoop(FC1->L) &&
-                 "Should not have removed loops in CandidateSet!");
-
-          LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();
-                     dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");
-
-          FC0->verify();
-          FC1->verify();
-
-          // Check if the candidates have identical tripcounts (first value of
-          // pair), and if not check the difference in the tripcounts between
-          // the loops (second value of pair). The difference is not equal to
-          // std::nullopt iff the loops iterate a constant number of times, and
-          // have a single exit.
-          std::pair<bool, std::optional<unsigned>> IdenticalTripCountRes =
-              haveIdenticalTripCounts(*FC0, *FC1);
-          bool SameTripCount = IdenticalTripCountRes.first;
-          std::optional<unsigned> TCDifference = IdenticalTripCountRes.second;
-
-          // Here we are checking that FC0 (the first loop) can be peeled, and
-          // both loops have different tripcounts.
-          if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
-            if (*TCDifference > FusionPeelMaxCount) {
-              LLVM_DEBUG(dbgs()
-                         << "Difference in loop trip counts: " << *TCDifference
-                         << " is greater than maximum peel count specificed: "
-                         << FusionPeelMaxCount << "\n");
-            } else {
-              // Dependent on peeling being performed on the first loop, and
-              // assuming all other conditions for fusion return true.
-              SameTripCount = true;
-            }
-          }
+      LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate List:\n"
+                        << CandidateList << "\n");
 
-          if (!SameTripCount) {
-            LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
-                                 "counts. Not fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       NonEqualTripCount);
-            continue;
-          }
+      for (auto It = CandidateList.begin(), NextIt = std::next(It);
+           NextIt != CandidateList.end(); It = NextIt, NextIt = std::next(It)) {
 
-          if (!isAdjacent(*FC0, *FC1)) {
-            LLVM_DEBUG(dbgs()
-                       << "Fusion candidates are not adjacent. Not fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, NonAdjacent);
-            continue;
-          }
+        auto FC0 = *It;
+        auto FC1 = *NextIt;
 
-          if ((!FC0->GuardBranch && FC1->GuardBranch) ||
-              (FC0->GuardBranch && !FC1->GuardBranch)) {
-            LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the "
-                                 "another one is not. Not fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(
-                *FC0, *FC1, OnlySecondCandidateIsGuarded);
-            continue;
-          }
+        assert(!LDT.isRemovedLoop(FC0.L) &&
+               "Should not have removed loops in CandidateList!");
+        assert(!LDT.isRemovedLoop(FC1.L) &&
+               "Should not have removed loops in CandidateList!");
 
-          // Ensure that FC0 and FC1 have identical guards.
-          // If one (or both) are not guarded, this check is not necessary.
-          if (FC0->GuardBranch && FC1->GuardBranch &&
-              !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
-            LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
-                                 "guards. Not Fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       NonIdenticalGuards);
-            continue;
-          }
+        LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0.dump();
+                   dbgs() << " with\n"; FC1.dump(); dbgs() << "\n");
 
-          if (FC0->GuardBranch) {
-            assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
-
-            if (!isSafeToMoveBefore(*FC0->ExitBlock,
-                                    *FC1->ExitBlock->getFirstNonPHIOrDbg(), DT,
-                                    &PDT, &DI)) {
-              LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
-                                   "instructions in exit block. Not fusing.\n");
-              reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                         NonEmptyExitBlock);
-              continue;
-            }
+        FC0.verify();
+        FC1.verify();
 
-            if (!isSafeToMoveBefore(
-                    *FC1->GuardBranch->getParent(),
-                    *FC0->GuardBranch->getParent()->getTerminator(), DT, &PDT,
-                    &DI)) {
-              LLVM_DEBUG(dbgs()
-                         << "Fusion candidate contains unsafe "
-                            "instructions in guard block. Not fusing.\n");
-              reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                         NonEmptyGuardBlock);
-              continue;
-            }
-          }
-
-          // Check the dependencies across the loops and do not fuse if it would
-          // violate them.
-          if (!dependencesAllowFusion(*FC0, *FC1)) {
-            LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       InvalidDependencies);
-            continue;
-          }
+        // Check if the candidates have identical tripcounts (first value of
+        // pair), and if not check the difference in the tripcounts between
+        // the loops (second value of pair). The difference is not equal to
+        // std::nullopt iff the loops iterate a constant number of times, and
+        // have a single exit.
+        std::pair<bool, std::optional<unsigned>> IdenticalTripCountRes =
+            haveIdenticalTripCounts(FC0, FC1);
+        bool SameTripCount = IdenticalTripCountRes.first;
+        std::optional<unsigned> TCDifference = IdenticalTripCountRes.second;
 
-          // If the second loop has instructions in the pre-header, attempt to
-          // hoist them up to the first loop's pre-header or sink them into the
-          // body of the second loop.
-          SmallVector<Instruction *, 4> SafeToHoist;
-          SmallVector<Instruction *, 4> SafeToSink;
-          // At this point, this is the last remaining legality check.
-          // Which means if we can make this pre-header empty, we can fuse
-          // these loops
-          if (!isEmptyPreheader(*FC1)) {
-            LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
-                                 "preheader.\n");
-
-            // If it is not safe to hoist/sink all instructions in the
-            // pre-header, we cannot fuse these loops.
-            if (!collectMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,
-                                              SafeToSink)) {
-              LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
-                                   "Fusion Candidate Pre-header.\n"
-                                << "Not Fusing.\n");
-              reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                         NonEmptyPreheader);
-              continue;
-            }
+        // Here we are checking that FC0 (the first loop) can be peeled, and
+        // both loops have different tripcounts.
+        if (FC0.AbleToPeel && !SameTripCount && TCDifference) {
+          if (*TCDifference > FusionPeelMaxCount) {
+            LLVM_DEBUG(dbgs()
+                       << "Difference in loop trip counts: " << *TCDifference
+                       << " is greater than maximum peel count specificed: "
+                       << FusionPeelMaxCount << "\n");
+          } else {
+            // Dependent on peeling being performed on the first loop, and
+            // assuming all other conditions for fusion return true.
+            SameTripCount = true;
           }
+        }
 
-          bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
-          LLVM_DEBUG(dbgs()
-                     << "\tFusion appears to be "
-                     << (BeneficialToFuse ? "" : "un") << "profitable!\n");
-          if (!BeneficialToFuse) {
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       FusionNotBeneficial);
-            continue;
-          }
-          // All analysis has completed and has determined that fusion is legal
-          // and profitable. At this point, start transforming the code and
-          // perform fusion.
+        if (!SameTripCount) {
+          LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
+                               "counts. Not fusing.\n");
+          reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                     NonEqualTripCount);
+          continue;
+        }
 
-          // Execute the hoist/sink operations on preheader instructions
-          movePreheaderInsts(*FC0, *FC1, SafeToHoist, SafeToSink);
+        if ((!FC0.GuardBranch && FC1.GuardBranch) ||
+            (FC0.GuardBranch && !FC1.GuardBranch)) {
+          LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the "
+                               "another one is not. Not fusing.\n");
+          reportLoopFusion<OptimizationRemarkMissed>(
+              FC0, FC1, OnlySecondCandidateIsGuarded);
+          continue;
+        }
 
-          LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
-                            << *FC1 << "\n");
+        // Ensure that FC0 and FC1 have identical guards.
+        // If one (or both) are not guarded, this check is not necessary.
+        if (FC0.GuardBranch && FC1.GuardBranch &&
+            !haveIdenticalGuards(FC0, FC1) && !TCDifference) {
+          LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
+                               "guards. Not Fusing.\n");
+          reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                     NonIdenticalGuards);
+          continue;
+        }
 
-          FusionCandidate FC0Copy = *FC0;
-          // Peel the loop after determining that fusion is legal. The Loops
-          // will still be safe to fuse after the peeling is performed.
-          bool Peel = TCDifference && *TCDifference > 0;
-          if (Peel)
-            peelFusionCandidate(FC0Copy, *FC1, *TCDifference);
+        if (FC0.GuardBranch) {
+          assert(FC1.GuardBranch && "Expecting valid FC1 guard branch");
 
-          // Report fusion to the Optimization Remarks.
-          // Note this needs to be done *before* performFusion because
-          // performFusion will change the original loops, making it not
-          // possible to identify them after fusion is complete.
-          reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1,
-                                               FuseCounter);
+          if (!isSafeToMoveBefore(*FC0.ExitBlock,
+                                  *FC1.ExitBlock->getFirstNonPHIOrDbg(), DT,
+                                  &PDT, &DI)) {
+            LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
+                                 "instructions in exit block. Not fusing.\n");
+            reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                       NonEmptyExitBlock);
+            continue;
+          }
 
-          FusionCandidate FusedCand(
-              performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE,
-              FC0Copy.PP);
-          FusedCand.verify();
-          assert(FusedCand.isEligibleForFusion(SE) &&
-                 "Fused candidate should be eligible for fusion!");
+          if (!isSafeToMoveBefore(
+                  *FC1.GuardBranch->getParent(),
+                  *FC0.GuardBranch->getParent()->getTerminator(), DT, &PDT,
+                  &DI)) {
+            LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
+                                 "instructions in guard block. Not fusing.\n");
+            reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                       NonEmptyGuardBlock);
+            continue;
+          }
+        }
 
-          // Notify the loop-depth-tree that these loops are not valid objects
-          LDT.removeLoop(FC1->L);
+        // Check the dependencies across the loops and do not fuse if it would
+        // violate them.
+        if (!dependencesAllowFusion(FC0, FC1)) {
+          LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
+          reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                     InvalidDependencies);
+          continue;
+        }
 
-          CandidateSet.erase(FC0);
-          CandidateSet.erase(FC1);
+        // If the second loop has instructions in the pre-header, attempt to
+        // hoist them up to the first loop's pre-header or sink them into the
+        // body of the second loop.
+        SmallVector<Instruction *, 4> SafeToHoist;
+        SmallVector<Instruction *, 4> SafeToSink;
+        // At this point, this is the last remaining legality check.
+        // Which means if we can make this pre-header empty, we can fuse
+        // these loops
+        if (!isEmptyPreheader(FC1)) {
+          LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
+                               "preheader.\n");
+
+          // If it is not safe to hoist/sink all instructions in the
+          // pre-header, we cannot fuse these loops.
+          if (!collectMovablePreheaderInsts(FC0, FC1, SafeToHoist,
+                                            SafeToSink)) {
+            LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
+                                 "Fusion Candidate Pre-header.\n"
+                              << "Not Fusing.\n");
+            reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                       NonEmptyPreheader);
+            continue;
+          }
+        }
 
-          auto InsertPos = CandidateSet.insert(FusedCand);
+        bool BeneficialToFuse = isBeneficialFusion(FC0, FC1);
+        LLVM_DEBUG(dbgs() << "\tFusion appears to be "
+                          << (BeneficialToFuse ? "" : "un") << "profitable!\n");
+        if (!BeneficialToFuse) {
+          reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                     FusionNotBeneficial);
+          continue;
+        }
+        // All analysis has completed and has determined that fusion is legal
+        // and profitable. At this point, start transforming the code and
+        // perform fusion.
 
-          assert(InsertPos.second &&
-                 "Unable to insert TargetCandidate in CandidateSet!");
+        // Execute the hoist/sink operations on preheader instructions
+        movePreheaderInsts(FC0, FC1, SafeToHoist, SafeToSink);
 
-          // Reset FC0 and FC1 the new (fused) candidate. Subsequent iterations
-          // of the FC1 loop will attempt to fuse the new (fused) loop with the
-          // remaining candidates in the current candidate set.
-          FC0 = FC1 = InsertPos.first;
+        LLVM_DEBUG(dbgs() << "\tFusion is performed: " << FC0 << " and " << FC1
+                          << "\n");
 
-          LLVM_DEBUG(dbgs() << "Candidate Set (after fusion): " << CandidateSet
-                            << "\n");
+        FusionCandidate FC0Copy = FC0;
+        // Peel the loop after determining that fusion is legal. The Loops
+        // will still be safe to fuse after the peeling is performed.
+        bool Peel = TCDifference && *TCDifference > 0;
+        if (Peel)
+          peelFusionCandidate(FC0Copy, FC1, *TCDifference);
+
+        // Report fusion to the Optimization Remarks.
+        // Note this needs to be done *before* performFusion because
+        // performFusion will change the original loops, making it not
+        // possible to identify them after fusion is complete.
+        reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : FC0), FC1,
+                                             FuseCounter);
+
+        FusionCandidate FusedCand(performFusion((Peel ? FC0Copy : FC0), FC1),
+                                  DT, &PDT, ORE, FC0Copy.PP);
+        FusedCand.verify();
+        assert(FusedCand.isEligibleForFusion(SE) &&
+               "Fused candidate should be eligible for fusion!");
+
+        // Notify the loop-depth-tree that these loops are not valid objects
+        LDT.removeLoop(FC1.L);
+
+        // Replace FC0 and FC1 with their fused loop
+        It = CandidateList.erase(It);
+        It = CandidateList.erase(It);
+        It = CandidateList.insert(It, FusedCand);
+
+        // Start from FusedCand in the next iteration
+        NextIt = It;
+
+        LLVM_DEBUG(dbgs() << "Candidate List (after fusion): " << CandidateList
+                          << "\n");
 
-          Fused = true;
-        }
+        Fused = true;
       }
     }
     return Fused;
@@ -1488,7 +1394,7 @@ struct LoopFuser {
     return true;
   }
 
-  /// Determine if two fusion candidates are adjacent in the CFG.
+  /// Determine if two fusion candidates are strictly adjacent in the CFG.
   ///
   /// This method will determine if there are additional basic blocks in the CFG
   /// between the exit of \p FC0 and the entry of \p FC1.
@@ -1497,11 +1403,14 @@ struct LoopFuser {
   /// FC1. If not, then the loops are not adjacent. If the two candidates are
   /// not guarded loops, then it checks whether the exit block of \p FC0 is the
   /// preheader of \p FC1.
-  bool isAdjacent(const FusionCandidate &FC0,
-                  const FusionCandidate &FC1) const {
+  /// Strictly means there is no predecessor for FC1 unless it is from FC0,
+  /// i.e., FC0 dominates FC1.
+  bool isStrictlyAdjacent(const FusionCandidate &FC0,
+                          const FusionCandidate &FC1) const {
     // If the successor of the guard branch is FC1, then the loops are adjacent
     if (FC0.GuardBranch)
-      return FC0.getNonLoopBlock() == FC1.getEntryBlock();
+      return DT.dominates(FC0.getEntryBlock(), FC1.getEntryBlock()) &&
+             FC0.getNonLoopBlock() == FC1.getEntryBlock();
     else
       return FC0.ExitBlock == FC1.getEntryBlock();
   }
diff --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
index 0e076c60d6085..8384d46837a7e 100644
--- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -25,8 +25,6 @@ using namespace llvm;
 STATISTIC(HasDependences,
           "Cannot move across instructions that has memory dependences");
 STATISTIC(MayThrowException, "Cannot move across instructions that may throw");
-STATISTIC(NotControlFlowEquivalent,
-          "Instructions are not control flow equivalent");
 STATISTIC(NotMovedPHINode, "Movement of PHINodes are not supported");
 STATISTIC(NotMovedTerminator, "Movement of Terminator are not supported");
 
@@ -228,44 +226,6 @@ bool ControlConditions::isInverse(const Value &V1, const Value &V2) {
   return false;
 }
 
-bool llvm::isControlFlowEquivalent(const Instruction &I0, const Instruction &I1,
-                                   const DominatorTree &DT,
-                                   const PostDominatorTree &PDT) {
-  return isControlFlowEquivalent(*I0.getParent(), *I1.getParent(), DT, PDT);
-}
-
-bool llvm::isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1,
-                                   const DominatorTree &DT,
-                                   const PostDominatorTree &PDT) {
-  if (&BB0 == &BB1)
-    return true;
-
-  if ((DT.dominates(&BB0, &BB1) && PDT.dominates(&BB1, &BB0)) ||
-      (PDT.dominates(&BB0, &BB1) && DT.dominates(&BB1, &BB0)))
-    return true;
-
-  // If the set of conditions required to execute BB0 and BB1 from their common
-  // dominator are the same, then BB0 and BB1 are control flow equivalent.
-  const BasicBlock *CommonDominator = DT.findNearestCommonDominator(&BB0, &BB1);
-  LLVM_DEBUG(dbgs() << "The nearest common dominator of " << BB0.getName()
-                    << " and " << BB1.getName() << " is "
-                    << CommonDominator->getName() << "\n");
-
-  const std::optional<ControlConditions> BB0Conditions =
-      ControlConditions::collectControlConditions(BB0, *CommonDominator, DT,
-                                                  PDT);
-  if (BB0Conditions == std::nullopt)
-    return false;
-
-  const std::optional<ControlConditions> BB1Conditions =
-      ControlConditions::collectControlConditions(BB1, *CommonDominator, DT,
-                                                  PDT);
-  if (BB1Conditions == std::nullopt)
-    return false;
-
-  return BB0Conditions->isEquivalent(*BB1Conditions);
-}
-
 static bool reportInvalidCandidate(const Instruction &I,
                                    llvm::Statistic &Stat) {
   ++Stat;
@@ -330,10 +290,6 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
   if (I.isTerminator())
     return reportInvalidCandidate(I, NotMovedTerminator);
 
-  // TODO remove this limitation.
-  if (!isControlFlowEquivalent(I, InsertPoint, DT, *PDT))
-    return reportInvalidCandidate(I, NotControlFlowEquivalent);
-
   if (isReachedBefore(&I, &InsertPoint, &DT, PDT))
     for (const Use &U : I.uses())
       if (auto *UserInst = dyn_cast<Instruction>(U.getUser())) {
@@ -450,8 +406,6 @@ bool llvm::nonStrictlyPostDominate(const BasicBlock *ThisBlock,
                                    const BasicBlock *OtherBlock,
                                    const DominatorTree *DT,
                                    const PostDominatorTree *PDT) {
-  assert(isControlFlowEquivalent(*ThisBlock, *OtherBlock, *DT, *PDT) &&
-         "ThisBlock and OtherBlock must be CFG equivalent!");
   const BasicBlock *CommonDominator =
       DT->findNearestCommonDominator(ThisBlock, OtherBlock);
   if (CommonDominator == nullptr)
diff --git a/llvm/test/Transforms/LoopFusion/cannot_fuse.ll b/llvm/test/Transforms/LoopFusion/cannot_fuse.ll
index 6819ec377ab17..0eea73940904d 100644
--- a/llvm/test/Transforms/LoopFusion/cannot_fuse.ll
+++ b/llvm/test/Transforms/LoopFusion/cannot_fuse.ll
@@ -8,10 +8,10 @@
 
 ; CHECK: Performing Loop Fusion on function non_cfe
 ; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK: bb
 ; CHECK: ****************************
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK: bb20.preheader
 ; CHECK: ****************************
 ; CHECK: Loop Fusion complete
@@ -81,14 +81,12 @@ bb33:                                             ; preds = %bb33.loopexit, %bb1
 
 ; CHECK: Performing Loop Fusion on function non_adjacent
 ; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
+; CHECK-NEXT: ****************************
+; CHECK: *** Fusion Candidate List ***
 ; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
-; CHECK-NEXT: [[LOOP1PREHEADER]]
-; CHECK-NEXT: [[LOOP2PREHEADER]]
-; CHECK: Fusion candidates are not adjacent. Not fusing.
 ; CHECK: Loop Fusion complete
 define void @non_adjacent(ptr noalias %arg) {
 bb:
@@ -143,11 +141,11 @@ bb25:                                             ; preds = %bb15
 
 ; CHECK: Performing Loop Fusion on function different_bounds
 ; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
+; CHECK: Attempting fusion on Candidate List:
 ; CHECK-NEXT: [[LOOP1PREHEADER]]
 ; CHECK-NEXT: [[LOOP2PREHEADER]]
 ; CHECK: Fusion candidates do not have identical trip counts. Not fusing.
@@ -157,7 +155,7 @@ bb:
   br label %bb5
 
 bb4:                                              ; preds = %bb11
-  br label %bb13
+  br label %bb16
 
 bb5:                                              ; preds = %bb, %bb11
   %.013 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
@@ -175,14 +173,11 @@ bb11:                                             ; preds = %bb5
   %exitcond2 = icmp ne i64 %tmp12, 100
   br i1 %exitcond2, label %bb5, label %bb4
 
-bb13:                                             ; preds = %bb4
-  br label %bb16
-
 bb15:                                             ; preds = %bb23
   br label %bb25
 
-bb16:                                             ; preds = %bb13, %bb23
-  %.02 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ]
+bb16:                                             ; preds = %bb4, %bb23
+  %.02 = phi i64 [ 0, %bb4 ], [ %tmp24, %bb23 ]
   %tmp17 = add nsw i64 %.02, -3
   %tmp18 = add nuw nsw i64 %.02, 3
   %tmp19 = mul nsw i64 %tmp17, %tmp18
@@ -206,11 +201,11 @@ bb25:                                             ; preds = %bb15
 
 ; CHECK: Performing Loop Fusion on function negative_dependence
 ; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
+; CHECK: Attempting fusion on Candidate List:
 ; CHECK-NEXT: [[LOOP1PREHEADER]]
 ; CHECK-NEXT: [[LOOP2PREHEADER]]
 ; CHECK: Memory dependencies do not allow fusion!
@@ -260,11 +255,11 @@ bb19:                                             ; preds = %bb18
 
 ; CHECK: Performing Loop Fusion on function sumTest
 ; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
+; CHECK: Attempting fusion on Candidate List:
 ; CHECK-NEXT: [[LOOP1PREHEADER]]
 ; CHECK-NEXT: [[LOOP2PREHEADER]]
 ; CHECK: Memory dependencies do not allow fusion!
@@ -314,11 +309,11 @@ bb21:                                             ; preds = %bb14
 
 ; CHECK: Performing Loop Fusion on function test
 ; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK-NEXT: [[LOOP1PREHEADER:for.body[0-9]*.preheader]]
 ; CHECK-NEXT: [[LOOP2PREHEADER:for.body[0-9]*.preheader]]
 ; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
+; CHECK: Attempting fusion on Candidate List:
 ; CHECK-NEXT: [[LOOP1PREHEADER]]
 ; CHECK-NEXT: [[LOOP2PREHEADER]]
 ; CHECK: Memory dependencies do not allow fusion!
diff --git a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
index f30a070153742..711d462da37d7 100644
--- a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
+++ b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
@@ -5,64 +5,11 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 @B = common global [1024 x i32] zeroinitializer, align 16, !dbg !0
 
-; CHECK: remark: diagnostics_missed.c:18:3: [non_adjacent]: entry and for.end: Loops are not adjacent
-define void @non_adjacent(ptr noalias %A) !dbg !14 {
-entry:
-  br label %for.body
-
-for.cond.cleanup:                                 ; preds = %for.inc
-  br label %for.end
-
-for.body:                                         ; preds = %entry, %for.inc
-  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
-  %sub = add nsw i64 %i.02, -3
-  %add = add nuw nsw i64 %i.02, 3
-  %mul = mul nsw i64 %sub, %add
-  %rem = srem i64 %mul, %i.02
-  %conv = trunc i64 %rem to i32
-  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.02
-  store i32 %conv, ptr %arrayidx, align 4
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.body
-  %inc = add nuw nsw i64 %i.02, 1, !dbg !26
-  %exitcond1 = icmp ne i64 %inc, 100
-  br i1 %exitcond1, label %for.body, label %for.cond.cleanup, !llvm.loop !28
-
-for.end:                                          ; preds = %for.cond.cleanup
-  br label %for.body6
-
-for.cond.cleanup5:                                ; preds = %for.inc13
-  br label %for.end15
-
-for.body6:                                        ; preds = %for.end, %for.inc13
-  %i1.01 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ]
-  %sub7 = add nsw i64 %i1.01, -3
-  %add8 = add nuw nsw i64 %i1.01, 3
-  %mul9 = mul nsw i64 %sub7, %add8
-  %rem10 = srem i64 %mul9, %i1.01
-  %conv11 = trunc i64 %rem10 to i32
-  %arrayidx12 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %i1.01
-  store i32 %conv11, ptr %arrayidx12, align 4
-  br label %for.inc13
-
-for.inc13:                                        ; preds = %for.body6
-  %inc14 = add nuw nsw i64 %i1.01, 1, !dbg !31
-  %exitcond = icmp ne i64 %inc14, 100
-  br i1 %exitcond, label %for.body6, label %for.cond.cleanup5, !llvm.loop !33
-
-for.end15:                                        ; preds = %for.cond.cleanup5
-  ret void
-}
-
 ; CHECK: remark: diagnostics_missed.c:28:3: [different_bounds]: entry and for.end: Loop trip counts are not the same
 define void @different_bounds(ptr noalias %A) !dbg !36 {
 entry:
   br label %for.body
 
-for.cond.cleanup:                                 ; preds = %for.inc
-  br label %for.end
-
 for.body:                                         ; preds = %entry, %for.inc
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
   %sub = add nsw i64 %i.02, -3
@@ -77,9 +24,9 @@ for.body:                                         ; preds = %entry, %for.inc
 for.inc:                                          ; preds = %for.body
   %inc = add nuw nsw i64 %i.02, 1, !dbg !43
   %exitcond1 = icmp ne i64 %inc, 100
-  br i1 %exitcond1, label %for.body, label %for.cond.cleanup, !llvm.loop !45
+  br i1 %exitcond1, label %for.body, label %for.end, !llvm.loop !45
 
-for.end:                                          ; preds = %for.cond.cleanup
+for.end:                                          ; preds = %for.inc
   br label %for.body6
 
 for.cond.cleanup5:                                ; preds = %for.inc13
diff --git a/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp b/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp
index 191ccc3a9dbd9..c2a8045e86dab 100644
--- a/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp
@@ -63,373 +63,6 @@ static Instruction *getInstructionByName(Function &F, StringRef Name) {
   llvm_unreachable("Expected to find instruction!");
 }
 
-TEST(CodeMoverUtils, IsControlFlowEquivalentSimpleTest) {
-  LLVMContext C;
-
-  // void foo(int &i, bool cond1, bool cond2) {
-  //   if (cond1)
-  //     i = 1;
-  //   if (cond1)
-  //     i = 2;
-  //   if (cond2)
-  //     i = 3;
-  // }
-  std::unique_ptr<Module> M =
-      parseIR(C, R"(define void @foo(ptr %i, i1 %cond1, i1 %cond2) {
-                 entry:
-                   br i1 %cond1, label %if.first, label %if.first.end
-                 if.first:
-                   store i32 1, ptr %i, align 4
-                   br label %if.first.end
-                 if.first.end:
-                   br i1 %cond1, label %if.second, label %if.second.end
-                 if.second:
-                   store i32 2, ptr %i, align 4
-                   br label %if.second.end
-                 if.second.end:
-                   br i1 %cond2, label %if.third, label %if.third.end
-                 if.third:
-                   store i32 3, ptr %i, align 4
-                   br label %if.third.end
-                 if.third.end:
-                   ret void
-                 })");
-  run(*M, "foo",
-      [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
-          DependenceInfo &DI) {
-        BasicBlock *FirstIfBody = getBasicBlockByName(F, "if.first");
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*FirstIfBody, *FirstIfBody, DT, PDT));
-        BasicBlock *SecondIfBody = getBasicBlockByName(F, "if.second");
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*FirstIfBody, *SecondIfBody, DT, PDT));
-
-        BasicBlock *ThirdIfBody = getBasicBlockByName(F, "if.third");
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*FirstIfBody, *ThirdIfBody, DT, PDT));
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*SecondIfBody, *ThirdIfBody, DT, PDT));
-      });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentOppositeCondTest) {
-  LLVMContext C;
-
-  // void foo(int &i, unsigned X, unsigned Y) {
-  //   if (X < Y)
-  //     i = 1;
-  //   if (Y > X)
-  //     i = 2;
-  //   if (X >= Y)
-  //     i = 3;
-  //   else
-  //     i = 4;
-  //   if (X == Y)
-  //     i = 5;
-  //   if (Y == X)
-  //     i = 6;
-  //   else
-  //     i = 7;
-  //   if (X != Y)
-  //     i = 8;
-  //   else
-  //     i = 9;
-  // }
-  std::unique_ptr<Module> M =
-      parseIR(C, R"(define void @foo(ptr %i, i32 %X, i32 %Y) {
-                 entry:
-                   %cmp1 = icmp ult i32 %X, %Y
-                   br i1 %cmp1, label %if.first, label %if.first.end
-                 if.first:
-                   store i32 1, ptr %i, align 4
-                   br label %if.first.end
-                 if.first.end:
-                   %cmp2 = icmp ugt i32 %Y, %X
-                   br i1 %cmp2, label %if.second, label %if.second.end
-                 if.second:
-                   store i32 2, ptr %i, align 4
-                   br label %if.second.end
-                 if.second.end:
-                   %cmp3 = icmp uge i32 %X, %Y
-                   br i1 %cmp3, label %if.third, label %if.third.else
-                 if.third:
-                   store i32 3, ptr %i, align 4
-                   br label %if.third.end
-                 if.third.else:
-                   store i32 4, ptr %i, align 4
-                   br label %if.third.end
-                 if.third.end:
-                   %cmp4 = icmp eq i32 %X, %Y
-                   br i1 %cmp4, label %if.fourth, label %if.fourth.end
-                 if.fourth:
-                   store i32 5, ptr %i, align 4
-                   br label %if.fourth.end
-                 if.fourth.end:
-                   %cmp5 = icmp eq i32 %Y, %X
-                   br i1 %cmp5, label %if.fifth, label %if.fifth.else
-                 if.fifth:
-                   store i32 6, ptr %i, align 4
-                   br label %if.fifth.end
-                 if.fifth.else:
-                   store i32 7, ptr %i, align 4
-                   br label %if.fifth.end
-                 if.fifth.end:
-                   %cmp6 = icmp ne i32 %X, %Y
-                   br i1 %cmp6, label %if.sixth, label %if.sixth.else
-                 if.sixth:
-                   store i32 8, ptr %i, align 4
-                   br label %if.sixth.end
-                 if.sixth.else:
-                   store i32 9, ptr %i, align 4
-                   br label %if.sixth.end
-                 if.sixth.end:
-                   ret void
-                 })");
-  run(*M, "foo",
-      [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
-          DependenceInfo &DI) {
-        BasicBlock *FirstIfBody = getBasicBlockByName(F, "if.first");
-        BasicBlock *SecondIfBody = getBasicBlockByName(F, "if.second");
-        BasicBlock *ThirdIfBody = getBasicBlockByName(F, "if.third");
-        BasicBlock *ThirdElseBody = getBasicBlockByName(F, "if.third.else");
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*FirstIfBody, *ThirdElseBody, DT, PDT));
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*SecondIfBody, *ThirdElseBody, DT, PDT));
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*ThirdIfBody, *ThirdElseBody, DT, PDT));
-
-        BasicBlock *FourthIfBody = getBasicBlockByName(F, "if.fourth");
-        BasicBlock *FifthIfBody = getBasicBlockByName(F, "if.fifth");
-        BasicBlock *FifthElseBody = getBasicBlockByName(F, "if.fifth.else");
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*FifthIfBody, *FifthElseBody, DT, PDT));
-        BasicBlock *SixthIfBody = getBasicBlockByName(F, "if.sixth");
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*FifthElseBody, *SixthIfBody, DT, PDT));
-        BasicBlock *SixthElseBody = getBasicBlockByName(F, "if.sixth.else");
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*FourthIfBody, *SixthElseBody, DT, PDT));
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*FifthIfBody, *SixthElseBody, DT, PDT));
-      });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentCondNestTest) {
-  LLVMContext C;
-
-  // void foo(int &i, bool cond1, bool cond2) {
-  //   if (cond1)
-  //     if (cond2)
-  //       i = 1;
-  //   if (cond2)
-  //     if (cond1)
-  //       i = 2;
-  // }
-  std::unique_ptr<Module> M =
-      parseIR(C, R"(define void @foo(ptr %i, i1 %cond1, i1 %cond2) {
-         entry:
-           br i1 %cond1, label %if.outer.first, label %if.first.end
-         if.outer.first:
-           br i1 %cond2, label %if.inner.first, label %if.first.end
-         if.inner.first:
-           store i32 1, ptr %i, align 4
-           br label %if.first.end
-         if.first.end:
-           br i1 %cond2, label %if.outer.second, label %if.second.end
-         if.outer.second:
-           br i1 %cond1, label %if.inner.second, label %if.second.end
-         if.inner.second:
-           store i32 2, ptr %i, align 4
-           br label %if.second.end
-         if.second.end:
-           ret void
-         })");
-  run(*M, "foo",
-      [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
-          DependenceInfo &DI) {
-        BasicBlock *FirstOuterIfBody = getBasicBlockByName(F, "if.outer.first");
-        BasicBlock *FirstInnerIfBody = getBasicBlockByName(F, "if.inner.first");
-        BasicBlock *SecondOuterIfBody =
-            getBasicBlockByName(F, "if.outer.second");
-        BasicBlock *SecondInnerIfBody =
-            getBasicBlockByName(F, "if.inner.second");
-        EXPECT_TRUE(isControlFlowEquivalent(*FirstInnerIfBody,
-                                            *SecondInnerIfBody, DT, PDT));
-        EXPECT_FALSE(isControlFlowEquivalent(*FirstOuterIfBody,
-                                             *SecondOuterIfBody, DT, PDT));
-        EXPECT_FALSE(isControlFlowEquivalent(*FirstOuterIfBody,
-                                             *SecondInnerIfBody, DT, PDT));
-        EXPECT_FALSE(isControlFlowEquivalent(*FirstInnerIfBody,
-                                             *SecondOuterIfBody, DT, PDT));
-      });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentImbalanceTest) {
-  LLVMContext C;
-
-  // void foo(int &i, bool cond1, bool cond2) {
-  //   if (cond1)
-  //     if (cond2)
-  //       if (cond3)
-  //         i = 1;
-  //   if (cond2)
-  //     if (cond3)
-  //       i = 2;
-  //   if (cond1)
-  //     if (cond1)
-  //       i = 3;
-  //   if (cond1)
-  //     i = 4;
-  // }
-  std::unique_ptr<Module> M =
-      parseIR(C, R"(define void @foo(ptr %i, i1 %cond1, i1 %cond2, i1 %cond3) {
-         entry:
-           br i1 %cond1, label %if.outer.first, label %if.first.end
-         if.outer.first:
-           br i1 %cond2, label %if.middle.first, label %if.first.end
-         if.middle.first:
-           br i1 %cond3, label %if.inner.first, label %if.first.end
-         if.inner.first:
-           store i32 1, ptr %i, align 4
-           br label %if.first.end
-         if.first.end:
-           br i1 %cond2, label %if.outer.second, label %if.second.end
-         if.outer.second:
-           br i1 %cond3, label %if.inner.second, label %if.second.end
-         if.inner.second:
-           store i32 2, ptr %i, align 4
-           br label %if.second.end
-         if.second.end:
-           br i1 %cond1, label %if.outer.third, label %if.third.end
-         if.outer.third:
-           br i1 %cond1, label %if.inner.third, label %if.third.end
-         if.inner.third:
-           store i32 3, ptr %i, align 4
-           br label %if.third.end
-         if.third.end:
-           br i1 %cond1, label %if.fourth, label %if.fourth.end
-         if.fourth:
-           store i32 4, ptr %i, align 4
-           br label %if.fourth.end
-         if.fourth.end:
-           ret void
-         })");
-  run(*M, "foo",
-      [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
-          DependenceInfo &DI) {
-        BasicBlock *FirstIfBody = getBasicBlockByName(F, "if.inner.first");
-        BasicBlock *SecondIfBody = getBasicBlockByName(F, "if.inner.second");
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*FirstIfBody, *SecondIfBody, DT, PDT));
-
-        BasicBlock *ThirdIfBody = getBasicBlockByName(F, "if.inner.third");
-        BasicBlock *FourthIfBody = getBasicBlockByName(F, "if.fourth");
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*ThirdIfBody, *FourthIfBody, DT, PDT));
-      });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentPointerTest) {
-  LLVMContext C;
-
-  // void foo(int &i, int *cond) {
-  //   if (*cond)
-  //     i = 1;
-  //   if (*cond)
-  //     i = 2;
-  //   *cond = 1;
-  //   if (*cond)
-  //     i = 3;
-  // }
-  std::unique_ptr<Module> M =
-      parseIR(C, R"(define void @foo(ptr %i, ptr %cond) {
-                 entry:
-                   %0 = load i32, ptr %cond, align 4
-                   %tobool1 = icmp ne i32 %0, 0
-                   br i1 %tobool1, label %if.first, label %if.first.end
-                 if.first:
-                   store i32 1, ptr %i, align 4
-                   br label %if.first.end
-                 if.first.end:
-                   %1 = load i32, ptr %cond, align 4
-                   %tobool2 = icmp ne i32 %1, 0
-                   br i1 %tobool2, label %if.second, label %if.second.end
-                 if.second:
-                   store i32 2, ptr %i, align 4
-                   br label %if.second.end
-                 if.second.end:
-                   store i32 1, ptr %cond, align 4
-                   %2 = load i32, ptr %cond, align 4
-                   %tobool3 = icmp ne i32 %2, 0
-                   br i1 %tobool3, label %if.third, label %if.third.end
-                 if.third:
-                   store i32 3, ptr %i, align 4
-                   br label %if.third.end
-                 if.third.end:
-                   ret void
-                 })");
-  run(*M, "foo",
-      [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
-          DependenceInfo &DI) {
-        BasicBlock *FirstIfBody = getBasicBlockByName(F, "if.first");
-        BasicBlock *SecondIfBody = getBasicBlockByName(F, "if.second");
-        // Limitation: if we can prove cond haven't been modify between %0 and
-        // %1, then we can prove FirstIfBody and SecondIfBody are control flow
-        // equivalent.
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*FirstIfBody, *SecondIfBody, DT, PDT));
-
-        BasicBlock *ThirdIfBody = getBasicBlockByName(F, "if.third");
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*FirstIfBody, *ThirdIfBody, DT, PDT));
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*SecondIfBody, *ThirdIfBody, DT, PDT));
-      });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentNotPostdomTest) {
-  LLVMContext C;
-
-  // void foo(bool cond1, bool cond2) {
-  //   if (cond1) {
-  //     if (cond2)
-  //       return;
-  //   } else
-  //     if (cond2)
-  //       return;
-  //   return;
-  // }
-  std::unique_ptr<Module> M =
-      parseIR(C, R"(define void @foo(i1 %cond1, i1 %cond2) {
-                 idom:
-                   br i1 %cond1, label %succ0, label %succ1
-                 succ0:
-                   br i1 %cond2, label %succ0ret, label %succ0succ1
-                 succ0ret:
-                   ret void
-                 succ0succ1:
-                   br label %bb
-                 succ1:
-                   br i1 %cond2, label %succ1ret, label %succ1succ1
-                 succ1ret:
-                   ret void
-                 succ1succ1:
-                   br label %bb
-                 bb:
-                   ret void
-                 })");
-  run(*M, "foo",
-      [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
-          DependenceInfo &DI) {
-        BasicBlock &Idom = F.front();
-        assert(Idom.getName() == "idom" && "Expecting BasicBlock idom");
-        BasicBlock &BB = F.back();
-        assert(BB.getName() == "bb" && "Expecting BasicBlock bb");
-        EXPECT_FALSE(isControlFlowEquivalent(Idom, BB, DT, PDT));
-      });
-}
-
 TEST(CodeMoverUtils, IsSafeToMoveTest1) {
   LLVMContext C;
 
@@ -514,11 +147,6 @@ TEST(CodeMoverUtils, IsSafeToMoveTest1) {
         EXPECT_FALSE(isSafeToMoveBefore(*CI_unsafecall->getNextNode(),
                                         *CI_unsafecall, DT, &PDT, &DI));
 
-        // Moving instruction to non control flow equivalent places are not
-        // supported.
-        EXPECT_FALSE(
-            isSafeToMoveBefore(*SI_A5, *Entry->getTerminator(), DT, &PDT, &DI));
-
         // Moving PHINode is not supported.
         EXPECT_FALSE(isSafeToMoveBefore(PN, *PN.getNextNode()->getNextNode(),
                                         DT, &PDT, &DI));



More information about the llvm-commits mailing list