[llvm] 9bc38df - [LoopFusion] Simplifying the legality checks (#171889)

Fri Dec 12 12:09:38 PST 2025

Author: Alireza Torabian
Date: 2025-12-12T15:09:34-05:00
New Revision: 9bc38df587def5e88cfb18ec7f912c9a588c5d4b

URL: https://github.com/llvm/llvm-project/commit/9bc38df587def5e88cfb18ec7f912c9a588c5d4b
DIFF: https://github.com/llvm/llvm-project/commit/9bc38df587def5e88cfb18ec7f912c9a588c5d4b.diff

LOG: [LoopFusion] Simplifying the legality checks (#171889)

Considering that the current loop fusion only supports adjacent loops,
we are able to simplify the checks in this pass. By removing
`isControlFlowEquivalent` check, this patch fixes multiple issues
including #166560, #166535, #165031, #80301 and #168263.

Now only the sequential/adjacent candidates are collected in the same
list. This patch is the implementation of approach 2 discussed in post
#171207.

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
    llvm/lib/Transforms/Scalar/LoopFuse.cpp
    llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
    llvm/test/Transforms/LoopFusion/cannot_fuse.ll
    llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
    llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
index 877872485ab58..d473f7092f62e 100644

--- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
@@ -27,22 +27,6 @@ class DominatorTree;
 class Instruction;
 class PostDominatorTree;
 
-/// Return true if \p I0 and \p I1 are control flow equivalent.
-/// Two instructions are control flow equivalent if their basic blocks are
-/// control flow equivalent.
-LLVM_ABI bool isControlFlowEquivalent(const Instruction &I0,
-                                      const Instruction &I1,
-                                      const DominatorTree &DT,
-                                      const PostDominatorTree &PDT);
-
-/// Return true if \p BB0 and \p BB1 are control flow equivalent.
-/// Two basic blocks are control flow equivalent if when one executes, the other
-/// is guaranteed to execute.
-LLVM_ABI bool isControlFlowEquivalent(const BasicBlock &BB0,
-                                      const BasicBlock &BB1,
-                                      const DominatorTree &DT,
-                                      const PostDominatorTree &PDT);
-
 /// Return true if \p I can be safely moved before \p InsertPoint.
 LLVM_ABI bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
                                  DominatorTree &DT,

diff  --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 9ffa602416b05..3a06c3f00fa02 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -64,6 +64,7 @@
 #include "llvm/Transforms/Utils/CodeMoverUtils.h"
 #include "llvm/Transforms/Utils/LoopPeel.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
+#include <list>
 
 using namespace llvm;
 
@@ -85,7 +86,6 @@ STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
 STATISTIC(UnknownTripCount, "Loop has unknown trip count");
 STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
 STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
-STATISTIC(NonAdjacent, "Loops are not adjacent");
 STATISTIC(
     NonEmptyPreheader,
     "Loop has a non-empty preheader with instructions that cannot be moved");
@@ -174,10 +174,6 @@ struct FusionCandidate {
   /// Has this loop been Peeled
   bool Peeled;
 
-  /// Dominator and PostDominator trees are needed for the
-  /// FusionCandidateCompare function, required by FusionCandidateSet to
-  /// determine where the FusionCandidate should be inserted into the set. These
-  /// are used to establish ordering of the FusionCandidates based on dominance.
   DominatorTree &DT;
   const PostDominatorTree *PDT;
 
@@ -358,10 +354,10 @@ struct FusionCandidate {
 private:
   // This is only used internally for now, to clear the MemWrites and MemReads
   // list and setting Valid to false. I can't envision other uses of this right
-  // now, since once FusionCandidates are put into the FusionCandidateSet they
+  // now, since once FusionCandidates are put into the FusionCandidateList they
   // are immutable. Thus, any time we need to change/update a FusionCandidate,
-  // we must create a new one and insert it into the FusionCandidateSet to
-  // ensure the FusionCandidateSet remains ordered correctly.
+  // we must create a new one and insert it into the FusionCandidateList to
+  // ensure the FusionCandidateList remains ordered correctly.
   void invalidate() {
     MemWrites.clear();
     MemReads.clear();
@@ -381,86 +377,15 @@ struct FusionCandidate {
     return false;
   }
 };
-
-struct FusionCandidateCompare {
-  /// Comparison functor to sort two Control Flow Equivalent fusion candidates
-  /// into dominance order.
-  /// If LHS dominates RHS and RHS post-dominates LHS, return true;
-  /// If RHS dominates LHS and LHS post-dominates RHS, return false;
-  /// If both LHS and RHS are not dominating each other then, non-strictly
-  /// post dominate check will decide the order of candidates. If RHS
-  /// non-strictly post dominates LHS then, return true. If LHS non-strictly
-  /// post dominates RHS then, return false. If both are non-strictly post
-  /// dominate each other then, level in the post dominator tree will decide
-  /// the order of candidates.
-  bool operator()(const FusionCandidate &LHS,
-                  const FusionCandidate &RHS) const {
-    const DominatorTree *DT = &(LHS.DT);
-
-    BasicBlock *LHSEntryBlock = LHS.getEntryBlock();
-    BasicBlock *RHSEntryBlock = RHS.getEntryBlock();
-
-    // Do not save PDT to local variable as it is only used in asserts and thus
-    // will trigger an unused variable warning if building without asserts.
-    assert(DT && LHS.PDT && "Expecting valid dominator tree");
-
-    // Do this compare first so if LHS == RHS, function returns false.
-    if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) {
-      // RHS dominates LHS
-      // Verify LHS post-dominates RHS
-      assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));
-      return false;
-    }
-
-    if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) {
-      // Verify RHS Postdominates LHS
-      assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));
-      return true;
-    }
-
-    // If two FusionCandidates are in the same level of dominator tree,
-    // they will not dominate each other, but may still be control flow
-    // equivalent. To sort those FusionCandidates, nonStrictlyPostDominate()
-    // function is needed.
-    bool WrongOrder =
-        nonStrictlyPostDominate(LHSEntryBlock, RHSEntryBlock, DT, LHS.PDT);
-    bool RightOrder =
-        nonStrictlyPostDominate(RHSEntryBlock, LHSEntryBlock, DT, LHS.PDT);
-    if (WrongOrder && RightOrder) {
-      // If common predecessor of LHS and RHS post dominates both
-      // FusionCandidates then, Order of FusionCandidate can be
-      // identified by its level in post dominator tree.
-      DomTreeNode *LNode = LHS.PDT->getNode(LHSEntryBlock);
-      DomTreeNode *RNode = LHS.PDT->getNode(RHSEntryBlock);
-      return LNode->getLevel() > RNode->getLevel();
-    } else if (WrongOrder)
-      return false;
-    else if (RightOrder)
-      return true;
-
-    // If LHS does not non-strict Postdominate RHS and RHS does not non-strict
-    // Postdominate LHS then, there is no dominance relationship between the
-    // two FusionCandidates. Thus, they should not be in the same set together.
-    llvm_unreachable(
-        "No dominance relationship between these fusion candidates!");
-  }
-};
 } // namespace
 
 using LoopVector = SmallVector<Loop *, 4>;
 
-// Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance
-// order. Thus, if FC0 comes *before* FC1 in a FusionCandidateSet, then FC0
-// dominates FC1 and FC1 post-dominates FC0.
-// std::set was chosen because we want a sorted data structure with stable
-// iterators. A subsequent patch to loop fusion will enable fusing non-adjacent
-// loops by moving intervening code around. When this intervening code contains
-// loops, those loops will be moved also. The corresponding FusionCandidates
-// will also need to be moved accordingly. As this is done, having stable
-// iterators will simplify the logic. Similarly, having an efficient insert that
-// keeps the FusionCandidateSet sorted will also simplify the implementation.
-using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
-using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
+// List of adjacent fusion candidates in order. Thus, if FC0 comes *before* FC1
+// in a FusionCandidateList, then FC0 dominates FC1, FC1 post-dominates FC0,
+// and they are adjacent.
+using FusionCandidateList = std::list<FusionCandidate>;
+using FusionCandidateCollection = SmallVector<FusionCandidateList, 4>;
 
 #ifndef NDEBUG
 static void printLoopVector(const LoopVector &LV) {
@@ -480,8 +405,8 @@ static raw_ostream &operator<<(raw_ostream &OS, const FusionCandidate &FC) {
 }
 
 static raw_ostream &operator<<(raw_ostream &OS,
-                               const FusionCandidateSet &CandSet) {
-  for (const FusionCandidate &FC : CandSet)
+                               const FusionCandidateList &CandList) {
+  for (const FusionCandidate &FC : CandList)
     OS << FC << '\n';
 
   return OS;
@@ -490,9 +415,9 @@ static raw_ostream &operator<<(raw_ostream &OS,
 static void
 printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
   dbgs() << "Fusion Candidates: \n";
-  for (const auto &CandidateSet : FusionCandidates) {
-    dbgs() << "*** Fusion Candidate Set ***\n";
-    dbgs() << CandidateSet;
+  for (const auto &CandidateList : FusionCandidates) {
+    dbgs() << "*** Fusion Candidate List ***\n";
+    dbgs() << CandidateList;
     dbgs() << "****************************\n";
   }
 }
@@ -648,20 +573,6 @@ struct LoopFuser {
   }
 
 private:
-  /// Determine if two fusion candidates are control flow equivalent.
-  ///
-  /// Two fusion candidates are control flow equivalent if when one executes,
-  /// the other is guaranteed to execute. This is determined using dominators
-  /// and post-dominators: if A dominates B and B post-dominates A then A and B
-  /// are control-flow equivalent.
-  bool isControlFlowEquivalent(const FusionCandidate &FC0,
-                               const FusionCandidate &FC1) const {
-    assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
-
-    return ::isControlFlowEquivalent(*FC0.getEntryBlock(), *FC1.getEntryBlock(),
-                                     DT, PDT);
-  }
-
   /// Iterate over all loops in the given loop set and identify the loops that
   /// are eligible for fusion. Place all eligible fusion candidates into Control
   /// Flow Equivalent sets, sorted by dominance.
@@ -673,34 +584,42 @@ struct LoopFuser {
       if (!CurrCand.isEligibleForFusion(SE))
         continue;
 
-      // Go through each list in FusionCandidates and determine if L is control
-      // flow equivalent with the first loop in that list. If it is, append LV.
+      // Go through each list in FusionCandidates and determine if the first or
+      // last loop in the list is strictly adjacent to L. If it is, append L.
       // If not, go to the next list.
       // If no suitable list is found, start another list and add it to
       // FusionCandidates.
-      bool FoundSet = false;
-
-      for (auto &CurrCandSet : FusionCandidates) {
-        if (isControlFlowEquivalent(*CurrCandSet.begin(), CurrCand)) {
-          CurrCandSet.insert(CurrCand);
-          FoundSet = true;
+      bool FoundAdjacent = false;
+      for (auto &CurrCandList : FusionCandidates) {
+        if (isStrictlyAdjacent(CurrCand, CurrCandList.front())) {
+          CurrCandList.push_front(CurrCand);
+          FoundAdjacent = true;
 #ifndef NDEBUG
           if (VerboseFusionDebugging)
             LLVM_DEBUG(dbgs() << "Adding " << CurrCand
-                              << " to existing candidate set\n");
+                              << " to existing candidate list\n");
+#endif
+          break;
+        } else if (isStrictlyAdjacent(CurrCandList.back(), CurrCand)) {
+          CurrCandList.push_back(CurrCand);
+          FoundAdjacent = true;
+#ifndef NDEBUG
+          if (VerboseFusionDebugging)
+            LLVM_DEBUG(dbgs() << "Adding " << CurrCand
+                              << " to existing candidate list\n");
 #endif
           break;
         }
       }
-      if (!FoundSet) {
-        // No set was found. Create a new set and add to FusionCandidates
+      if (!FoundAdjacent) {
+        // No list was found. Create a new list and add to FusionCandidates
 #ifndef NDEBUG
         if (VerboseFusionDebugging)
-          LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");
+          LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new list\n");
 #endif
-        FusionCandidateSet NewCandSet;
-        NewCandSet.insert(CurrCand);
-        FusionCandidates.push_back(NewCandSet);
+        FusionCandidateList NewCandList;
+        NewCandList.push_back(CurrCand);
+        FusionCandidates.push_back(NewCandList);
       }
       NumFusionCandidates++;
     }
@@ -849,218 +768,205 @@ struct LoopFuser {
     }
   }
 
-  /// Walk each set of control flow equivalent fusion candidates and attempt to
-  /// fuse them. This does a single linear traversal of all candidates in the
-  /// set. The conditions for legal fusion are checked at this point. If a pair
-  /// of fusion candidates passes all legality checks, they are fused together
-  /// and a new fusion candidate is created and added to the FusionCandidateSet.
+  /// Walk each set of strictly adjacent fusion candidates and attempt to fuse
+  /// them. This does a single linear traversal of all candidates in the list.
+  /// The conditions for legal fusion are checked at this point. If a pair of
+  /// fusion candidates passes all legality checks, they are fused together and
+  /// a new fusion candidate is created and added to the FusionCandidateList.
   /// The original fusion candidates are then removed, as they are no longer
   /// valid.
   bool fuseCandidates() {
     bool Fused = false;
     LLVM_DEBUG(printFusionCandidates(FusionCandidates));
-    for (auto &CandidateSet : FusionCandidates) {
-      if (CandidateSet.size() < 2)
+    for (auto &CandidateList : FusionCandidates) {
+      if (CandidateList.size() < 2)
         continue;
 
-      LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"
-                        << CandidateSet << "\n");
-
-      for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
-        assert(!LDT.isRemovedLoop(FC0->L) &&
-               "Should not have removed loops in CandidateSet!");
-        auto FC1 = FC0;
-        for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
-          assert(!LDT.isRemovedLoop(FC1->L) &&
-                 "Should not have removed loops in CandidateSet!");
-
-          LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();
-                     dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");
-
-          FC0->verify();
-          FC1->verify();
-
-          // Check if the candidates have identical tripcounts (first value of
-          // pair), and if not check the 
diff erence in the tripcounts between
-          // the loops (second value of pair). The 
diff erence is not equal to
-          // std::nullopt iff the loops iterate a constant number of times, and
-          // have a single exit.
-          std::pair<bool, std::optional<unsigned>> IdenticalTripCountRes =
-              haveIdenticalTripCounts(*FC0, *FC1);
-          bool SameTripCount = IdenticalTripCountRes.first;
-          std::optional<unsigned> TCDifference = IdenticalTripCountRes.second;
-
-          // Here we are checking that FC0 (the first loop) can be peeled, and
-          // both loops have 
diff erent tripcounts.
-          if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
-            if (*TCDifference > FusionPeelMaxCount) {
-              LLVM_DEBUG(dbgs()
-                         << "Difference in loop trip counts: " << *TCDifference
-                         << " is greater than maximum peel count specificed: "
-                         << FusionPeelMaxCount << "\n");
-            } else {
-              // Dependent on peeling being performed on the first loop, and
-              // assuming all other conditions for fusion return true.
-              SameTripCount = true;
-            }
-          }
+      LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate List:\n"
+                        << CandidateList << "\n");
 
-          if (!SameTripCount) {
-            LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
-                                 "counts. Not fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       NonEqualTripCount);
-            continue;
-          }
+      for (auto It = CandidateList.begin(), NextIt = std::next(It);
+           NextIt != CandidateList.end(); It = NextIt, NextIt = std::next(It)) {
 
-          if (!isAdjacent(*FC0, *FC1)) {
-            LLVM_DEBUG(dbgs()
-                       << "Fusion candidates are not adjacent. Not fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, NonAdjacent);
-            continue;
-          }
+        auto FC0 = *It;
+        auto FC1 = *NextIt;
 
-          if ((!FC0->GuardBranch && FC1->GuardBranch) ||
-              (FC0->GuardBranch && !FC1->GuardBranch)) {
-            LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the "
-                                 "another one is not. Not fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(
-                *FC0, *FC1, OnlySecondCandidateIsGuarded);
-            continue;
-          }
+        assert(!LDT.isRemovedLoop(FC0.L) &&
+               "Should not have removed loops in CandidateList!");
+        assert(!LDT.isRemovedLoop(FC1.L) &&
+               "Should not have removed loops in CandidateList!");
 
-          // Ensure that FC0 and FC1 have identical guards.
-          // If one (or both) are not guarded, this check is not necessary.
-          if (FC0->GuardBranch && FC1->GuardBranch &&
-              !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
-            LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
-                                 "guards. Not Fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       NonIdenticalGuards);
-            continue;
-          }
+        LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0.dump();
+                   dbgs() << " with\n"; FC1.dump(); dbgs() << "\n");
 
-          if (FC0->GuardBranch) {
-            assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
-
-            if (!isSafeToMoveBefore(*FC0->ExitBlock,
-                                    *FC1->ExitBlock->getFirstNonPHIOrDbg(), DT,
-                                    &PDT, &DI)) {
-              LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
-                                   "instructions in exit block. Not fusing.\n");
-              reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                         NonEmptyExitBlock);
-              continue;
-            }
+        FC0.verify();
+        FC1.verify();
 
-            if (!isSafeToMoveBefore(
-                    *FC1->GuardBranch->getParent(),
-                    *FC0->GuardBranch->getParent()->getTerminator(), DT, &PDT,
-                    &DI)) {
-              LLVM_DEBUG(dbgs()
-                         << "Fusion candidate contains unsafe "
-                            "instructions in guard block. Not fusing.\n");
-              reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                         NonEmptyGuardBlock);
-              continue;
-            }
-          }
-
-          // Check the dependencies across the loops and do not fuse if it would
-          // violate them.
-          if (!dependencesAllowFusion(*FC0, *FC1)) {
-            LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       InvalidDependencies);
-            continue;
-          }
+        // Check if the candidates have identical tripcounts (first value of
+        // pair), and if not check the 
diff erence in the tripcounts between
+        // the loops (second value of pair). The 
diff erence is not equal to
+        // std::nullopt iff the loops iterate a constant number of times, and
+        // have a single exit.
+        std::pair<bool, std::optional<unsigned>> IdenticalTripCountRes =
+            haveIdenticalTripCounts(FC0, FC1);
+        bool SameTripCount = IdenticalTripCountRes.first;
+        std::optional<unsigned> TCDifference = IdenticalTripCountRes.second;
 
-          // If the second loop has instructions in the pre-header, attempt to
-          // hoist them up to the first loop's pre-header or sink them into the
-          // body of the second loop.
-          SmallVector<Instruction *, 4> SafeToHoist;
-          SmallVector<Instruction *, 4> SafeToSink;
-          // At this point, this is the last remaining legality check.
-          // Which means if we can make this pre-header empty, we can fuse
-          // these loops
-          if (!isEmptyPreheader(*FC1)) {
-            LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
-                                 "preheader.\n");
-
-            // If it is not safe to hoist/sink all instructions in the
-            // pre-header, we cannot fuse these loops.
-            if (!collectMovablePreheaderInsts(*FC0, *FC1, SafeToHoist,
-                                              SafeToSink)) {
-              LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
-                                   "Fusion Candidate Pre-header.\n"
-                                << "Not Fusing.\n");
-              reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                         NonEmptyPreheader);
-              continue;
-            }
+        // Here we are checking that FC0 (the first loop) can be peeled, and
+        // both loops have 
diff erent tripcounts.
+        if (FC0.AbleToPeel && !SameTripCount && TCDifference) {
+          if (*TCDifference > FusionPeelMaxCount) {
+            LLVM_DEBUG(dbgs()
+                       << "Difference in loop trip counts: " << *TCDifference
+                       << " is greater than maximum peel count specificed: "
+                       << FusionPeelMaxCount << "\n");
+          } else {
+            // Dependent on peeling being performed on the first loop, and
+            // assuming all other conditions for fusion return true.
+            SameTripCount = true;
           }
+        }
 
-          bool BeneficialToFuse = isBeneficialFusion(*FC0, *FC1);
-          LLVM_DEBUG(dbgs()
-                     << "\tFusion appears to be "
-                     << (BeneficialToFuse ? "" : "un") << "profitable!\n");
-          if (!BeneficialToFuse) {
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       FusionNotBeneficial);
-            continue;
-          }
-          // All analysis has completed and has determined that fusion is legal
-          // and profitable. At this point, start transforming the code and
-          // perform fusion.
+        if (!SameTripCount) {
+          LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
+                               "counts. Not fusing.\n");
+          reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                     NonEqualTripCount);
+          continue;
+        }
 
-          // Execute the hoist/sink operations on preheader instructions
-          movePreheaderInsts(*FC0, *FC1, SafeToHoist, SafeToSink);
+        if ((!FC0.GuardBranch && FC1.GuardBranch) ||
+            (FC0.GuardBranch && !FC1.GuardBranch)) {
+          LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the "
+                               "another one is not. Not fusing.\n");
+          reportLoopFusion<OptimizationRemarkMissed>(
+              FC0, FC1, OnlySecondCandidateIsGuarded);
+          continue;
+        }
 
-          LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
-                            << *FC1 << "\n");
+        // Ensure that FC0 and FC1 have identical guards.
+        // If one (or both) are not guarded, this check is not necessary.
+        if (FC0.GuardBranch && FC1.GuardBranch &&
+            !haveIdenticalGuards(FC0, FC1) && !TCDifference) {
+          LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
+                               "guards. Not Fusing.\n");
+          reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                     NonIdenticalGuards);
+          continue;
+        }
 
-          FusionCandidate FC0Copy = *FC0;
-          // Peel the loop after determining that fusion is legal. The Loops
-          // will still be safe to fuse after the peeling is performed.
-          bool Peel = TCDifference && *TCDifference > 0;
-          if (Peel)
-            peelFusionCandidate(FC0Copy, *FC1, *TCDifference);
+        if (FC0.GuardBranch) {
+          assert(FC1.GuardBranch && "Expecting valid FC1 guard branch");
 
-          // Report fusion to the Optimization Remarks.
-          // Note this needs to be done *before* performFusion because
-          // performFusion will change the original loops, making it not
-          // possible to identify them after fusion is complete.
-          reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1,
-                                               FuseCounter);
+          if (!isSafeToMoveBefore(*FC0.ExitBlock,
+                                  *FC1.ExitBlock->getFirstNonPHIOrDbg(), DT,
+                                  &PDT, &DI)) {
+            LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
+                                 "instructions in exit block. Not fusing.\n");
+            reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                       NonEmptyExitBlock);
+            continue;
+          }
 
-          FusionCandidate FusedCand(
-              performFusion((Peel ? FC0Copy : *FC0), *FC1), DT, &PDT, ORE,
-              FC0Copy.PP);
-          FusedCand.verify();
-          assert(FusedCand.isEligibleForFusion(SE) &&
-                 "Fused candidate should be eligible for fusion!");
+          if (!isSafeToMoveBefore(
+                  *FC1.GuardBranch->getParent(),
+                  *FC0.GuardBranch->getParent()->getTerminator(), DT, &PDT,
+                  &DI)) {
+            LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
+                                 "instructions in guard block. Not fusing.\n");
+            reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                       NonEmptyGuardBlock);
+            continue;
+          }
+        }
 
-          // Notify the loop-depth-tree that these loops are not valid objects
-          LDT.removeLoop(FC1->L);
+        // Check the dependencies across the loops and do not fuse if it would
+        // violate them.
+        if (!dependencesAllowFusion(FC0, FC1)) {
+          LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
+          reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                     InvalidDependencies);
+          continue;
+        }
 
-          CandidateSet.erase(FC0);
-          CandidateSet.erase(FC1);
+        // If the second loop has instructions in the pre-header, attempt to
+        // hoist them up to the first loop's pre-header or sink them into the
+        // body of the second loop.
+        SmallVector<Instruction *, 4> SafeToHoist;
+        SmallVector<Instruction *, 4> SafeToSink;
+        // At this point, this is the last remaining legality check.
+        // Which means if we can make this pre-header empty, we can fuse
+        // these loops
+        if (!isEmptyPreheader(FC1)) {
+          LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
+                               "preheader.\n");
+
+          // If it is not safe to hoist/sink all instructions in the
+          // pre-header, we cannot fuse these loops.
+          if (!collectMovablePreheaderInsts(FC0, FC1, SafeToHoist,
+                                            SafeToSink)) {
+            LLVM_DEBUG(dbgs() << "Could not hoist/sink all instructions in "
+                                 "Fusion Candidate Pre-header.\n"
+                              << "Not Fusing.\n");
+            reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                       NonEmptyPreheader);
+            continue;
+          }
+        }
 
-          auto InsertPos = CandidateSet.insert(FusedCand);
+        bool BeneficialToFuse = isBeneficialFusion(FC0, FC1);
+        LLVM_DEBUG(dbgs() << "\tFusion appears to be "
+                          << (BeneficialToFuse ? "" : "un") << "profitable!\n");
+        if (!BeneficialToFuse) {
+          reportLoopFusion<OptimizationRemarkMissed>(FC0, FC1,
+                                                     FusionNotBeneficial);
+          continue;
+        }
+        // All analysis has completed and has determined that fusion is legal
+        // and profitable. At this point, start transforming the code and
+        // perform fusion.
 
-          assert(InsertPos.second &&
-                 "Unable to insert TargetCandidate in CandidateSet!");
+        // Execute the hoist/sink operations on preheader instructions
+        movePreheaderInsts(FC0, FC1, SafeToHoist, SafeToSink);
 
-          // Reset FC0 and FC1 the new (fused) candidate. Subsequent iterations
-          // of the FC1 loop will attempt to fuse the new (fused) loop with the
-          // remaining candidates in the current candidate set.
-          FC0 = FC1 = InsertPos.first;
+        LLVM_DEBUG(dbgs() << "\tFusion is performed: " << FC0 << " and " << FC1
+                          << "\n");
 
-          LLVM_DEBUG(dbgs() << "Candidate Set (after fusion): " << CandidateSet
-                            << "\n");
+        FusionCandidate FC0Copy = FC0;
+        // Peel the loop after determining that fusion is legal. The Loops
+        // will still be safe to fuse after the peeling is performed.
+        bool Peel = TCDifference && *TCDifference > 0;
+        if (Peel)
+          peelFusionCandidate(FC0Copy, FC1, *TCDifference);
+
+        // Report fusion to the Optimization Remarks.
+        // Note this needs to be done *before* performFusion because
+        // performFusion will change the original loops, making it not
+        // possible to identify them after fusion is complete.
+        reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : FC0), FC1,
+                                             FuseCounter);
+
+        FusionCandidate FusedCand(performFusion((Peel ? FC0Copy : FC0), FC1),
+                                  DT, &PDT, ORE, FC0Copy.PP);
+        FusedCand.verify();
+        assert(FusedCand.isEligibleForFusion(SE) &&
+               "Fused candidate should be eligible for fusion!");
+
+        // Notify the loop-depth-tree that these loops are not valid objects
+        LDT.removeLoop(FC1.L);
+
+        // Replace FC0 and FC1 with their fused loop
+        It = CandidateList.erase(It);
+        It = CandidateList.erase(It);
+        It = CandidateList.insert(It, FusedCand);
+
+        // Start from FusedCand in the next iteration
+        NextIt = It;
+
+        LLVM_DEBUG(dbgs() << "Candidate List (after fusion): " << CandidateList
+                          << "\n");
 
-          Fused = true;
-        }
+        Fused = true;
       }
     }
     return Fused;
@@ -1488,7 +1394,7 @@ struct LoopFuser {
     return true;
   }
 
-  /// Determine if two fusion candidates are adjacent in the CFG.
+  /// Determine if two fusion candidates are strictly adjacent in the CFG.
   ///
   /// This method will determine if there are additional basic blocks in the CFG
   /// between the exit of \p FC0 and the entry of \p FC1.
@@ -1497,11 +1403,14 @@ struct LoopFuser {
   /// FC1. If not, then the loops are not adjacent. If the two candidates are
   /// not guarded loops, then it checks whether the exit block of \p FC0 is the
   /// preheader of \p FC1.
-  bool isAdjacent(const FusionCandidate &FC0,
-                  const FusionCandidate &FC1) const {
+  /// Strictly means there is no predecessor for FC1 unless it is from FC0,
+  /// i.e., FC0 dominates FC1.
+  bool isStrictlyAdjacent(const FusionCandidate &FC0,
+                          const FusionCandidate &FC1) const {
     // If the successor of the guard branch is FC1, then the loops are adjacent
     if (FC0.GuardBranch)
-      return FC0.getNonLoopBlock() == FC1.getEntryBlock();
+      return DT.dominates(FC0.getEntryBlock(), FC1.getEntryBlock()) &&
+             FC0.getNonLoopBlock() == FC1.getEntryBlock();
     else
       return FC0.ExitBlock == FC1.getEntryBlock();
   }

diff  --git a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
index 0e076c60d6085..8384d46837a7e 100644
--- a/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
+++ b/llvm/lib/Transforms/Utils/CodeMoverUtils.cpp
@@ -25,8 +25,6 @@ using namespace llvm;
 STATISTIC(HasDependences,
           "Cannot move across instructions that has memory dependences");
 STATISTIC(MayThrowException, "Cannot move across instructions that may throw");
-STATISTIC(NotControlFlowEquivalent,
-          "Instructions are not control flow equivalent");
 STATISTIC(NotMovedPHINode, "Movement of PHINodes are not supported");
 STATISTIC(NotMovedTerminator, "Movement of Terminator are not supported");
 
@@ -228,44 +226,6 @@ bool ControlConditions::isInverse(const Value &V1, const Value &V2) {
   return false;
 }
 
-bool llvm::isControlFlowEquivalent(const Instruction &I0, const Instruction &I1,
-                                   const DominatorTree &DT,
-                                   const PostDominatorTree &PDT) {
-  return isControlFlowEquivalent(*I0.getParent(), *I1.getParent(), DT, PDT);
-}
-
-bool llvm::isControlFlowEquivalent(const BasicBlock &BB0, const BasicBlock &BB1,
-                                   const DominatorTree &DT,
-                                   const PostDominatorTree &PDT) {
-  if (&BB0 == &BB1)
-    return true;
-
-  if ((DT.dominates(&BB0, &BB1) && PDT.dominates(&BB1, &BB0)) ||
-      (PDT.dominates(&BB0, &BB1) && DT.dominates(&BB1, &BB0)))
-    return true;
-
-  // If the set of conditions required to execute BB0 and BB1 from their common
-  // dominator are the same, then BB0 and BB1 are control flow equivalent.
-  const BasicBlock *CommonDominator = DT.findNearestCommonDominator(&BB0, &BB1);
-  LLVM_DEBUG(dbgs() << "The nearest common dominator of " << BB0.getName()
-                    << " and " << BB1.getName() << " is "
-                    << CommonDominator->getName() << "\n");
-
-  const std::optional<ControlConditions> BB0Conditions =
-      ControlConditions::collectControlConditions(BB0, *CommonDominator, DT,
-                                                  PDT);
-  if (BB0Conditions == std::nullopt)
-    return false;
-
-  const std::optional<ControlConditions> BB1Conditions =
-      ControlConditions::collectControlConditions(BB1, *CommonDominator, DT,
-                                                  PDT);
-  if (BB1Conditions == std::nullopt)
-    return false;
-
-  return BB0Conditions->isEquivalent(*BB1Conditions);
-}
-
 static bool reportInvalidCandidate(const Instruction &I,
                                    llvm::Statistic &Stat) {
   ++Stat;
@@ -330,10 +290,6 @@ bool llvm::isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
   if (I.isTerminator())
     return reportInvalidCandidate(I, NotMovedTerminator);
 
-  // TODO remove this limitation.
-  if (!isControlFlowEquivalent(I, InsertPoint, DT, *PDT))
-    return reportInvalidCandidate(I, NotControlFlowEquivalent);
-
   if (isReachedBefore(&I, &InsertPoint, &DT, PDT))
     for (const Use &U : I.uses())
       if (auto *UserInst = dyn_cast<Instruction>(U.getUser())) {
@@ -450,8 +406,6 @@ bool llvm::nonStrictlyPostDominate(const BasicBlock *ThisBlock,
                                    const BasicBlock *OtherBlock,
                                    const DominatorTree *DT,
                                    const PostDominatorTree *PDT) {
-  assert(isControlFlowEquivalent(*ThisBlock, *OtherBlock, *DT, *PDT) &&
-         "ThisBlock and OtherBlock must be CFG equivalent!");
   const BasicBlock *CommonDominator =
       DT->findNearestCommonDominator(ThisBlock, OtherBlock);
   if (CommonDominator == nullptr)

diff  --git a/llvm/test/Transforms/LoopFusion/cannot_fuse.ll b/llvm/test/Transforms/LoopFusion/cannot_fuse.ll
index 6819ec377ab17..0eea73940904d 100644
--- a/llvm/test/Transforms/LoopFusion/cannot_fuse.ll
+++ b/llvm/test/Transforms/LoopFusion/cannot_fuse.ll
@@ -8,10 +8,10 @@
 
 ; CHECK: Performing Loop Fusion on function non_cfe
 ; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK: bb
 ; CHECK: ****************************
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK: bb20.preheader
 ; CHECK: ****************************
 ; CHECK: Loop Fusion complete
@@ -81,14 +81,12 @@ bb33:                                             ; preds = %bb33.loopexit, %bb1
 
 ; CHECK: Performing Loop Fusion on function non_adjacent
 ; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
+; CHECK-NEXT: ****************************
+; CHECK: *** Fusion Candidate List ***
 ; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
-; CHECK-NEXT: [[LOOP1PREHEADER]]
-; CHECK-NEXT: [[LOOP2PREHEADER]]
-; CHECK: Fusion candidates are not adjacent. Not fusing.
 ; CHECK: Loop Fusion complete
 define void @non_adjacent(ptr noalias %arg) {
 bb:
@@ -143,11 +141,11 @@ bb25:                                             ; preds = %bb15
 
 ; CHECK: Performing Loop Fusion on function 
diff erent_bounds
 ; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
+; CHECK: Attempting fusion on Candidate List:
 ; CHECK-NEXT: [[LOOP1PREHEADER]]
 ; CHECK-NEXT: [[LOOP2PREHEADER]]
 ; CHECK: Fusion candidates do not have identical trip counts. Not fusing.
@@ -157,7 +155,7 @@ bb:
   br label %bb5
 
 bb4:                                              ; preds = %bb11
-  br label %bb13
+  br label %bb16
 
 bb5:                                              ; preds = %bb, %bb11
   %.013 = phi i64 [ 0, %bb ], [ %tmp12, %bb11 ]
@@ -175,14 +173,11 @@ bb11:                                             ; preds = %bb5
   %exitcond2 = icmp ne i64 %tmp12, 100
   br i1 %exitcond2, label %bb5, label %bb4
 
-bb13:                                             ; preds = %bb4
-  br label %bb16
-
 bb15:                                             ; preds = %bb23
   br label %bb25
 
-bb16:                                             ; preds = %bb13, %bb23
-  %.02 = phi i64 [ 0, %bb13 ], [ %tmp24, %bb23 ]
+bb16:                                             ; preds = %bb4, %bb23
+  %.02 = phi i64 [ 0, %bb4 ], [ %tmp24, %bb23 ]
   %tmp17 = add nsw i64 %.02, -3
   %tmp18 = add nuw nsw i64 %.02, 3
   %tmp19 = mul nsw i64 %tmp17, %tmp18
@@ -206,11 +201,11 @@ bb25:                                             ; preds = %bb15
 
 ; CHECK: Performing Loop Fusion on function negative_dependence
 ; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
+; CHECK: Attempting fusion on Candidate List:
 ; CHECK-NEXT: [[LOOP1PREHEADER]]
 ; CHECK-NEXT: [[LOOP2PREHEADER]]
 ; CHECK: Memory dependencies do not allow fusion!
@@ -260,11 +255,11 @@ bb19:                                             ; preds = %bb18
 
 ; CHECK: Performing Loop Fusion on function sumTest
 ; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK-NEXT: [[LOOP1PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: [[LOOP2PREHEADER:bb[0-9]*]]
 ; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
+; CHECK: Attempting fusion on Candidate List:
 ; CHECK-NEXT: [[LOOP1PREHEADER]]
 ; CHECK-NEXT: [[LOOP2PREHEADER]]
 ; CHECK: Memory dependencies do not allow fusion!
@@ -314,11 +309,11 @@ bb21:                                             ; preds = %bb14
 
 ; CHECK: Performing Loop Fusion on function test
 ; CHECK: Fusion Candidates:
-; CHECK: *** Fusion Candidate Set ***
+; CHECK: *** Fusion Candidate List ***
 ; CHECK-NEXT: [[LOOP1PREHEADER:for.body[0-9]*.preheader]]
 ; CHECK-NEXT: [[LOOP2PREHEADER:for.body[0-9]*.preheader]]
 ; CHECK-NEXT: ****************************
-; CHECK: Attempting fusion on Candidate Set:
+; CHECK: Attempting fusion on Candidate List:
 ; CHECK-NEXT: [[LOOP1PREHEADER]]
 ; CHECK-NEXT: [[LOOP2PREHEADER]]
 ; CHECK: Memory dependencies do not allow fusion!

diff  --git a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
index f30a070153742..711d462da37d7 100644
--- a/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
+++ b/llvm/test/Transforms/LoopFusion/diagnostics_missed.ll
@@ -5,64 +5,11 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 
 @B = common global [1024 x i32] zeroinitializer, align 16, !dbg !0
 
-; CHECK: remark: diagnostics_missed.c:18:3: [non_adjacent]: entry and for.end: Loops are not adjacent
-define void @non_adjacent(ptr noalias %A) !dbg !14 {
-entry:
-  br label %for.body
-
-for.cond.cleanup:                                 ; preds = %for.inc
-  br label %for.end
-
-for.body:                                         ; preds = %entry, %for.inc
-  %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
-  %sub = add nsw i64 %i.02, -3
-  %add = add nuw nsw i64 %i.02, 3
-  %mul = mul nsw i64 %sub, %add
-  %rem = srem i64 %mul, %i.02
-  %conv = trunc i64 %rem to i32
-  %arrayidx = getelementptr inbounds i32, ptr %A, i64 %i.02
-  store i32 %conv, ptr %arrayidx, align 4
-  br label %for.inc
-
-for.inc:                                          ; preds = %for.body
-  %inc = add nuw nsw i64 %i.02, 1, !dbg !26
-  %exitcond1 = icmp ne i64 %inc, 100
-  br i1 %exitcond1, label %for.body, label %for.cond.cleanup, !llvm.loop !28
-
-for.end:                                          ; preds = %for.cond.cleanup
-  br label %for.body6
-
-for.cond.cleanup5:                                ; preds = %for.inc13
-  br label %for.end15
-
-for.body6:                                        ; preds = %for.end, %for.inc13
-  %i1.01 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ]
-  %sub7 = add nsw i64 %i1.01, -3
-  %add8 = add nuw nsw i64 %i1.01, 3
-  %mul9 = mul nsw i64 %sub7, %add8
-  %rem10 = srem i64 %mul9, %i1.01
-  %conv11 = trunc i64 %rem10 to i32
-  %arrayidx12 = getelementptr inbounds [1024 x i32], ptr @B, i64 0, i64 %i1.01
-  store i32 %conv11, ptr %arrayidx12, align 4
-  br label %for.inc13
-
-for.inc13:                                        ; preds = %for.body6
-  %inc14 = add nuw nsw i64 %i1.01, 1, !dbg !31
-  %exitcond = icmp ne i64 %inc14, 100
-  br i1 %exitcond, label %for.body6, label %for.cond.cleanup5, !llvm.loop !33
-
-for.end15:                                        ; preds = %for.cond.cleanup5
-  ret void
-}
-
 ; CHECK: remark: diagnostics_missed.c:28:3: [
diff erent_bounds]: entry and for.end: Loop trip counts are not the same
 define void @
diff erent_bounds(ptr noalias %A) !dbg !36 {
 entry:
   br label %for.body
 
-for.cond.cleanup:                                 ; preds = %for.inc
-  br label %for.end
-
 for.body:                                         ; preds = %entry, %for.inc
   %i.02 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
   %sub = add nsw i64 %i.02, -3
@@ -77,9 +24,9 @@ for.body:                                         ; preds = %entry, %for.inc
 for.inc:                                          ; preds = %for.body
   %inc = add nuw nsw i64 %i.02, 1, !dbg !43
   %exitcond1 = icmp ne i64 %inc, 100
-  br i1 %exitcond1, label %for.body, label %for.cond.cleanup, !llvm.loop !45
+  br i1 %exitcond1, label %for.body, label %for.end, !llvm.loop !45
 
-for.end:                                          ; preds = %for.cond.cleanup
+for.end:                                          ; preds = %for.inc
   br label %for.body6
 
 for.cond.cleanup5:                                ; preds = %for.inc13

diff  --git a/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp b/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp
index 191ccc3a9dbd9..c2a8045e86dab 100644
--- a/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp
+++ b/llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp
@@ -63,373 +63,6 @@ static Instruction *getInstructionByName(Function &F, StringRef Name) {
   llvm_unreachable("Expected to find instruction!");
 }
 
-TEST(CodeMoverUtils, IsControlFlowEquivalentSimpleTest) {
-  LLVMContext C;
-
-  // void foo(int &i, bool cond1, bool cond2) {
-  //   if (cond1)
-  //     i = 1;
-  //   if (cond1)
-  //     i = 2;
-  //   if (cond2)
-  //     i = 3;
-  // }
-  std::unique_ptr<Module> M =
-      parseIR(C, R"(define void @foo(ptr %i, i1 %cond1, i1 %cond2) {
-                 entry:
-                   br i1 %cond1, label %if.first, label %if.first.end
-                 if.first:
-                   store i32 1, ptr %i, align 4
-                   br label %if.first.end
-                 if.first.end:
-                   br i1 %cond1, label %if.second, label %if.second.end
-                 if.second:
-                   store i32 2, ptr %i, align 4
-                   br label %if.second.end
-                 if.second.end:
-                   br i1 %cond2, label %if.third, label %if.third.end
-                 if.third:
-                   store i32 3, ptr %i, align 4
-                   br label %if.third.end
-                 if.third.end:
-                   ret void
-                 })");
-  run(*M, "foo",
-      [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
-          DependenceInfo &DI) {
-        BasicBlock *FirstIfBody = getBasicBlockByName(F, "if.first");
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*FirstIfBody, *FirstIfBody, DT, PDT));
-        BasicBlock *SecondIfBody = getBasicBlockByName(F, "if.second");
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*FirstIfBody, *SecondIfBody, DT, PDT));
-
-        BasicBlock *ThirdIfBody = getBasicBlockByName(F, "if.third");
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*FirstIfBody, *ThirdIfBody, DT, PDT));
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*SecondIfBody, *ThirdIfBody, DT, PDT));
-      });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentOppositeCondTest) {
-  LLVMContext C;
-
-  // void foo(int &i, unsigned X, unsigned Y) {
-  //   if (X < Y)
-  //     i = 1;
-  //   if (Y > X)
-  //     i = 2;
-  //   if (X >= Y)
-  //     i = 3;
-  //   else
-  //     i = 4;
-  //   if (X == Y)
-  //     i = 5;
-  //   if (Y == X)
-  //     i = 6;
-  //   else
-  //     i = 7;
-  //   if (X != Y)
-  //     i = 8;
-  //   else
-  //     i = 9;
-  // }
-  std::unique_ptr<Module> M =
-      parseIR(C, R"(define void @foo(ptr %i, i32 %X, i32 %Y) {
-                 entry:
-                   %cmp1 = icmp ult i32 %X, %Y
-                   br i1 %cmp1, label %if.first, label %if.first.end
-                 if.first:
-                   store i32 1, ptr %i, align 4
-                   br label %if.first.end
-                 if.first.end:
-                   %cmp2 = icmp ugt i32 %Y, %X
-                   br i1 %cmp2, label %if.second, label %if.second.end
-                 if.second:
-                   store i32 2, ptr %i, align 4
-                   br label %if.second.end
-                 if.second.end:
-                   %cmp3 = icmp uge i32 %X, %Y
-                   br i1 %cmp3, label %if.third, label %if.third.else
-                 if.third:
-                   store i32 3, ptr %i, align 4
-                   br label %if.third.end
-                 if.third.else:
-                   store i32 4, ptr %i, align 4
-                   br label %if.third.end
-                 if.third.end:
-                   %cmp4 = icmp eq i32 %X, %Y
-                   br i1 %cmp4, label %if.fourth, label %if.fourth.end
-                 if.fourth:
-                   store i32 5, ptr %i, align 4
-                   br label %if.fourth.end
-                 if.fourth.end:
-                   %cmp5 = icmp eq i32 %Y, %X
-                   br i1 %cmp5, label %if.fifth, label %if.fifth.else
-                 if.fifth:
-                   store i32 6, ptr %i, align 4
-                   br label %if.fifth.end
-                 if.fifth.else:
-                   store i32 7, ptr %i, align 4
-                   br label %if.fifth.end
-                 if.fifth.end:
-                   %cmp6 = icmp ne i32 %X, %Y
-                   br i1 %cmp6, label %if.sixth, label %if.sixth.else
-                 if.sixth:
-                   store i32 8, ptr %i, align 4
-                   br label %if.sixth.end
-                 if.sixth.else:
-                   store i32 9, ptr %i, align 4
-                   br label %if.sixth.end
-                 if.sixth.end:
-                   ret void
-                 })");
-  run(*M, "foo",
-      [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
-          DependenceInfo &DI) {
-        BasicBlock *FirstIfBody = getBasicBlockByName(F, "if.first");
-        BasicBlock *SecondIfBody = getBasicBlockByName(F, "if.second");
-        BasicBlock *ThirdIfBody = getBasicBlockByName(F, "if.third");
-        BasicBlock *ThirdElseBody = getBasicBlockByName(F, "if.third.else");
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*FirstIfBody, *ThirdElseBody, DT, PDT));
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*SecondIfBody, *ThirdElseBody, DT, PDT));
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*ThirdIfBody, *ThirdElseBody, DT, PDT));
-
-        BasicBlock *FourthIfBody = getBasicBlockByName(F, "if.fourth");
-        BasicBlock *FifthIfBody = getBasicBlockByName(F, "if.fifth");
-        BasicBlock *FifthElseBody = getBasicBlockByName(F, "if.fifth.else");
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*FifthIfBody, *FifthElseBody, DT, PDT));
-        BasicBlock *SixthIfBody = getBasicBlockByName(F, "if.sixth");
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*FifthElseBody, *SixthIfBody, DT, PDT));
-        BasicBlock *SixthElseBody = getBasicBlockByName(F, "if.sixth.else");
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*FourthIfBody, *SixthElseBody, DT, PDT));
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*FifthIfBody, *SixthElseBody, DT, PDT));
-      });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentCondNestTest) {
-  LLVMContext C;
-
-  // void foo(int &i, bool cond1, bool cond2) {
-  //   if (cond1)
-  //     if (cond2)
-  //       i = 1;
-  //   if (cond2)
-  //     if (cond1)
-  //       i = 2;
-  // }
-  std::unique_ptr<Module> M =
-      parseIR(C, R"(define void @foo(ptr %i, i1 %cond1, i1 %cond2) {
-         entry:
-           br i1 %cond1, label %if.outer.first, label %if.first.end
-         if.outer.first:
-           br i1 %cond2, label %if.inner.first, label %if.first.end
-         if.inner.first:
-           store i32 1, ptr %i, align 4
-           br label %if.first.end
-         if.first.end:
-           br i1 %cond2, label %if.outer.second, label %if.second.end
-         if.outer.second:
-           br i1 %cond1, label %if.inner.second, label %if.second.end
-         if.inner.second:
-           store i32 2, ptr %i, align 4
-           br label %if.second.end
-         if.second.end:
-           ret void
-         })");
-  run(*M, "foo",
-      [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
-          DependenceInfo &DI) {
-        BasicBlock *FirstOuterIfBody = getBasicBlockByName(F, "if.outer.first");
-        BasicBlock *FirstInnerIfBody = getBasicBlockByName(F, "if.inner.first");
-        BasicBlock *SecondOuterIfBody =
-            getBasicBlockByName(F, "if.outer.second");
-        BasicBlock *SecondInnerIfBody =
-            getBasicBlockByName(F, "if.inner.second");
-        EXPECT_TRUE(isControlFlowEquivalent(*FirstInnerIfBody,
-                                            *SecondInnerIfBody, DT, PDT));
-        EXPECT_FALSE(isControlFlowEquivalent(*FirstOuterIfBody,
-                                             *SecondOuterIfBody, DT, PDT));
-        EXPECT_FALSE(isControlFlowEquivalent(*FirstOuterIfBody,
-                                             *SecondInnerIfBody, DT, PDT));
-        EXPECT_FALSE(isControlFlowEquivalent(*FirstInnerIfBody,
-                                             *SecondOuterIfBody, DT, PDT));
-      });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentImbalanceTest) {
-  LLVMContext C;
-
-  // void foo(int &i, bool cond1, bool cond2) {
-  //   if (cond1)
-  //     if (cond2)
-  //       if (cond3)
-  //         i = 1;
-  //   if (cond2)
-  //     if (cond3)
-  //       i = 2;
-  //   if (cond1)
-  //     if (cond1)
-  //       i = 3;
-  //   if (cond1)
-  //     i = 4;
-  // }
-  std::unique_ptr<Module> M =
-      parseIR(C, R"(define void @foo(ptr %i, i1 %cond1, i1 %cond2, i1 %cond3) {
-         entry:
-           br i1 %cond1, label %if.outer.first, label %if.first.end
-         if.outer.first:
-           br i1 %cond2, label %if.middle.first, label %if.first.end
-         if.middle.first:
-           br i1 %cond3, label %if.inner.first, label %if.first.end
-         if.inner.first:
-           store i32 1, ptr %i, align 4
-           br label %if.first.end
-         if.first.end:
-           br i1 %cond2, label %if.outer.second, label %if.second.end
-         if.outer.second:
-           br i1 %cond3, label %if.inner.second, label %if.second.end
-         if.inner.second:
-           store i32 2, ptr %i, align 4
-           br label %if.second.end
-         if.second.end:
-           br i1 %cond1, label %if.outer.third, label %if.third.end
-         if.outer.third:
-           br i1 %cond1, label %if.inner.third, label %if.third.end
-         if.inner.third:
-           store i32 3, ptr %i, align 4
-           br label %if.third.end
-         if.third.end:
-           br i1 %cond1, label %if.fourth, label %if.fourth.end
-         if.fourth:
-           store i32 4, ptr %i, align 4
-           br label %if.fourth.end
-         if.fourth.end:
-           ret void
-         })");
-  run(*M, "foo",
-      [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
-          DependenceInfo &DI) {
-        BasicBlock *FirstIfBody = getBasicBlockByName(F, "if.inner.first");
-        BasicBlock *SecondIfBody = getBasicBlockByName(F, "if.inner.second");
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*FirstIfBody, *SecondIfBody, DT, PDT));
-
-        BasicBlock *ThirdIfBody = getBasicBlockByName(F, "if.inner.third");
-        BasicBlock *FourthIfBody = getBasicBlockByName(F, "if.fourth");
-        EXPECT_TRUE(
-            isControlFlowEquivalent(*ThirdIfBody, *FourthIfBody, DT, PDT));
-      });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentPointerTest) {
-  LLVMContext C;
-
-  // void foo(int &i, int *cond) {
-  //   if (*cond)
-  //     i = 1;
-  //   if (*cond)
-  //     i = 2;
-  //   *cond = 1;
-  //   if (*cond)
-  //     i = 3;
-  // }
-  std::unique_ptr<Module> M =
-      parseIR(C, R"(define void @foo(ptr %i, ptr %cond) {
-                 entry:
-                   %0 = load i32, ptr %cond, align 4
-                   %tobool1 = icmp ne i32 %0, 0
-                   br i1 %tobool1, label %if.first, label %if.first.end
-                 if.first:
-                   store i32 1, ptr %i, align 4
-                   br label %if.first.end
-                 if.first.end:
-                   %1 = load i32, ptr %cond, align 4
-                   %tobool2 = icmp ne i32 %1, 0
-                   br i1 %tobool2, label %if.second, label %if.second.end
-                 if.second:
-                   store i32 2, ptr %i, align 4
-                   br label %if.second.end
-                 if.second.end:
-                   store i32 1, ptr %cond, align 4
-                   %2 = load i32, ptr %cond, align 4
-                   %tobool3 = icmp ne i32 %2, 0
-                   br i1 %tobool3, label %if.third, label %if.third.end
-                 if.third:
-                   store i32 3, ptr %i, align 4
-                   br label %if.third.end
-                 if.third.end:
-                   ret void
-                 })");
-  run(*M, "foo",
-      [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
-          DependenceInfo &DI) {
-        BasicBlock *FirstIfBody = getBasicBlockByName(F, "if.first");
-        BasicBlock *SecondIfBody = getBasicBlockByName(F, "if.second");
-        // Limitation: if we can prove cond haven't been modify between %0 and
-        // %1, then we can prove FirstIfBody and SecondIfBody are control flow
-        // equivalent.
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*FirstIfBody, *SecondIfBody, DT, PDT));
-
-        BasicBlock *ThirdIfBody = getBasicBlockByName(F, "if.third");
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*FirstIfBody, *ThirdIfBody, DT, PDT));
-        EXPECT_FALSE(
-            isControlFlowEquivalent(*SecondIfBody, *ThirdIfBody, DT, PDT));
-      });
-}
-
-TEST(CodeMoverUtils, IsControlFlowEquivalentNotPostdomTest) {
-  LLVMContext C;
-
-  // void foo(bool cond1, bool cond2) {
-  //   if (cond1) {
-  //     if (cond2)
-  //       return;
-  //   } else
-  //     if (cond2)
-  //       return;
-  //   return;
-  // }
-  std::unique_ptr<Module> M =
-      parseIR(C, R"(define void @foo(i1 %cond1, i1 %cond2) {
-                 idom:
-                   br i1 %cond1, label %succ0, label %succ1
-                 succ0:
-                   br i1 %cond2, label %succ0ret, label %succ0succ1
-                 succ0ret:
-                   ret void
-                 succ0succ1:
-                   br label %bb
-                 succ1:
-                   br i1 %cond2, label %succ1ret, label %succ1succ1
-                 succ1ret:
-                   ret void
-                 succ1succ1:
-                   br label %bb
-                 bb:
-                   ret void
-                 })");
-  run(*M, "foo",
-      [&](Function &F, DominatorTree &DT, PostDominatorTree &PDT,
-          DependenceInfo &DI) {
-        BasicBlock &Idom = F.front();
-        assert(Idom.getName() == "idom" && "Expecting BasicBlock idom");
-        BasicBlock &BB = F.back();
-        assert(BB.getName() == "bb" && "Expecting BasicBlock bb");
-        EXPECT_FALSE(isControlFlowEquivalent(Idom, BB, DT, PDT));
-      });
-}
-
 TEST(CodeMoverUtils, IsSafeToMoveTest1) {
   LLVMContext C;
 
@@ -514,11 +147,6 @@ TEST(CodeMoverUtils, IsSafeToMoveTest1) {
         EXPECT_FALSE(isSafeToMoveBefore(*CI_unsafecall->getNextNode(),
                                         *CI_unsafecall, DT, &PDT, &DI));
 
-        // Moving instruction to non control flow equivalent places are not
-        // supported.
-        EXPECT_FALSE(
-            isSafeToMoveBefore(*SI_A5, *Entry->getTerminator(), DT, &PDT, &DI));
-
         // Moving PHINode is not supported.
         EXPECT_FALSE(isSafeToMoveBefore(PN, *PN.getNextNode()->getNextNode(),
                                         DT, &PDT, &DI));