[llvm] [LoopFusion] Simplifying the legality checks (PR #171889)

via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 11 11:57:43 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Alireza Torabian (1997alireza)

<details>
<summary>Changes</summary>

Considering that the current loop fusion only supports adjacent loops, we are able to simplify the checks in this pass. By removing `isControlFlowEquivalent` check, this patch fixes multiple issues including #<!-- -->166560, #<!-- -->166535, #<!-- -->165031, #<!-- -->80301 and #<!-- -->168263.

Now only the sequential candidates are collected in the same list. This patch is the implementation of approach 2 discussed in post #<!-- -->171207.

---

Patch is 57.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/171889.diff


6 Files Affected:

- (modified) llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h (-16) 
- (modified) llvm/lib/Transforms/Scalar/LoopFuse.cpp (+215-305) 
- (modified) llvm/lib/Transforms/Utils/CodeMoverUtils.cpp (-44) 
- (modified) llvm/test/Transforms/LoopFusion/cannot_fuse.ll (+16-21) 
- (modified) llvm/test/Transforms/LoopFusion/diagnostics_missed.ll (+2-55) 
- (modified) llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp (-367) 


``````````diff
diff --git a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
index 877872485ab58..d473f7092f62e 100644
--- a/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/CodeMoverUtils.h
@@ -27,22 +27,6 @@ class DominatorTree;
 class Instruction;
 class PostDominatorTree;
 
-/// Return true if \p I0 and \p I1 are control flow equivalent.
-/// Two instructions are control flow equivalent if their basic blocks are
-/// control flow equivalent.
-LLVM_ABI bool isControlFlowEquivalent(const Instruction &I0,
-                                      const Instruction &I1,
-                                      const DominatorTree &DT,
-                                      const PostDominatorTree &PDT);
-
-/// Return true if \p BB0 and \p BB1 are control flow equivalent.
-/// Two basic blocks are control flow equivalent if when one executes, the other
-/// is guaranteed to execute.
-LLVM_ABI bool isControlFlowEquivalent(const BasicBlock &BB0,
-                                      const BasicBlock &BB1,
-                                      const DominatorTree &DT,
-                                      const PostDominatorTree &PDT);
-
 /// Return true if \p I can be safely moved before \p InsertPoint.
 LLVM_ABI bool isSafeToMoveBefore(Instruction &I, Instruction &InsertPoint,
                                  DominatorTree &DT,
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index 9ffa602416b05..a71b01a9aaf24 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -64,6 +64,7 @@
 #include "llvm/Transforms/Utils/CodeMoverUtils.h"
 #include "llvm/Transforms/Utils/LoopPeel.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
+#include <list>
 
 using namespace llvm;
 
@@ -174,10 +175,6 @@ struct FusionCandidate {
   /// Has this loop been Peeled
   bool Peeled;
 
-  /// Dominator and PostDominator trees are needed for the
-  /// FusionCandidateCompare function, required by FusionCandidateSet to
-  /// determine where the FusionCandidate should be inserted into the set. These
-  /// are used to establish ordering of the FusionCandidates based on dominance.
   DominatorTree &DT;
   const PostDominatorTree *PDT;
 
@@ -358,10 +355,10 @@ struct FusionCandidate {
 private:
   // This is only used internally for now, to clear the MemWrites and MemReads
   // list and setting Valid to false. I can't envision other uses of this right
-  // now, since once FusionCandidates are put into the FusionCandidateSet they
+  // now, since once FusionCandidates are put into the FusionCandidateList they
   // are immutable. Thus, any time we need to change/update a FusionCandidate,
-  // we must create a new one and insert it into the FusionCandidateSet to
-  // ensure the FusionCandidateSet remains ordered correctly.
+  // we must create a new one and insert it into the FusionCandidateList to
+  // ensure the FusionCandidateList remains ordered correctly.
   void invalidate() {
     MemWrites.clear();
     MemReads.clear();
@@ -381,86 +378,15 @@ struct FusionCandidate {
     return false;
   }
 };
-
-struct FusionCandidateCompare {
-  /// Comparison functor to sort two Control Flow Equivalent fusion candidates
-  /// into dominance order.
-  /// If LHS dominates RHS and RHS post-dominates LHS, return true;
-  /// If RHS dominates LHS and LHS post-dominates RHS, return false;
-  /// If both LHS and RHS are not dominating each other then, non-strictly
-  /// post dominate check will decide the order of candidates. If RHS
-  /// non-strictly post dominates LHS then, return true. If LHS non-strictly
-  /// post dominates RHS then, return false. If both are non-strictly post
-  /// dominate each other then, level in the post dominator tree will decide
-  /// the order of candidates.
-  bool operator()(const FusionCandidate &LHS,
-                  const FusionCandidate &RHS) const {
-    const DominatorTree *DT = &(LHS.DT);
-
-    BasicBlock *LHSEntryBlock = LHS.getEntryBlock();
-    BasicBlock *RHSEntryBlock = RHS.getEntryBlock();
-
-    // Do not save PDT to local variable as it is only used in asserts and thus
-    // will trigger an unused variable warning if building without asserts.
-    assert(DT && LHS.PDT && "Expecting valid dominator tree");
-
-    // Do this compare first so if LHS == RHS, function returns false.
-    if (DT->dominates(RHSEntryBlock, LHSEntryBlock)) {
-      // RHS dominates LHS
-      // Verify LHS post-dominates RHS
-      assert(LHS.PDT->dominates(LHSEntryBlock, RHSEntryBlock));
-      return false;
-    }
-
-    if (DT->dominates(LHSEntryBlock, RHSEntryBlock)) {
-      // Verify RHS Postdominates LHS
-      assert(LHS.PDT->dominates(RHSEntryBlock, LHSEntryBlock));
-      return true;
-    }
-
-    // If two FusionCandidates are in the same level of dominator tree,
-    // they will not dominate each other, but may still be control flow
-    // equivalent. To sort those FusionCandidates, nonStrictlyPostDominate()
-    // function is needed.
-    bool WrongOrder =
-        nonStrictlyPostDominate(LHSEntryBlock, RHSEntryBlock, DT, LHS.PDT);
-    bool RightOrder =
-        nonStrictlyPostDominate(RHSEntryBlock, LHSEntryBlock, DT, LHS.PDT);
-    if (WrongOrder && RightOrder) {
-      // If common predecessor of LHS and RHS post dominates both
-      // FusionCandidates then, Order of FusionCandidate can be
-      // identified by its level in post dominator tree.
-      DomTreeNode *LNode = LHS.PDT->getNode(LHSEntryBlock);
-      DomTreeNode *RNode = LHS.PDT->getNode(RHSEntryBlock);
-      return LNode->getLevel() > RNode->getLevel();
-    } else if (WrongOrder)
-      return false;
-    else if (RightOrder)
-      return true;
-
-    // If LHS does not non-strict Postdominate RHS and RHS does not non-strict
-    // Postdominate LHS then, there is no dominance relationship between the
-    // two FusionCandidates. Thus, they should not be in the same set together.
-    llvm_unreachable(
-        "No dominance relationship between these fusion candidates!");
-  }
-};
 } // namespace
 
 using LoopVector = SmallVector<Loop *, 4>;
 
-// Set of Control Flow Equivalent (CFE) Fusion Candidates, sorted in dominance
-// order. Thus, if FC0 comes *before* FC1 in a FusionCandidateSet, then FC0
-// dominates FC1 and FC1 post-dominates FC0.
-// std::set was chosen because we want a sorted data structure with stable
-// iterators. A subsequent patch to loop fusion will enable fusing non-adjacent
-// loops by moving intervening code around. When this intervening code contains
-// loops, those loops will be moved also. The corresponding FusionCandidates
-// will also need to be moved accordingly. As this is done, having stable
-// iterators will simplify the logic. Similarly, having an efficient insert that
-// keeps the FusionCandidateSet sorted will also simplify the implementation.
-using FusionCandidateSet = std::set<FusionCandidate, FusionCandidateCompare>;
-using FusionCandidateCollection = SmallVector<FusionCandidateSet, 4>;
+// List of adjacent Fusion Candidates in order. Thus, if FC0 comes *before* FC1
+// in a FusionCandidateList, then FC0 dominates FC1, FC1 post-dominates FC0,
+// and they are sequential.
+using FusionCandidateList = std::list<FusionCandidate>;
+using FusionCandidateCollection = SmallVector<FusionCandidateList, 4>;
 
 #ifndef NDEBUG
 static void printLoopVector(const LoopVector &LV) {
@@ -480,8 +406,8 @@ static raw_ostream &operator<<(raw_ostream &OS, const FusionCandidate &FC) {
 }
 
 static raw_ostream &operator<<(raw_ostream &OS,
-                               const FusionCandidateSet &CandSet) {
-  for (const FusionCandidate &FC : CandSet)
+                               const FusionCandidateList &CandList) {
+  for (const FusionCandidate &FC : CandList)
     OS << FC << '\n';
 
   return OS;
@@ -490,9 +416,9 @@ static raw_ostream &operator<<(raw_ostream &OS,
 static void
 printFusionCandidates(const FusionCandidateCollection &FusionCandidates) {
   dbgs() << "Fusion Candidates: \n";
-  for (const auto &CandidateSet : FusionCandidates) {
-    dbgs() << "*** Fusion Candidate Set ***\n";
-    dbgs() << CandidateSet;
+  for (const auto &CandidateList : FusionCandidates) {
+    dbgs() << "*** Fusion Candidate List ***\n";
+    dbgs() << CandidateList;
     dbgs() << "****************************\n";
   }
 }
@@ -648,20 +574,6 @@ struct LoopFuser {
   }
 
 private:
-  /// Determine if two fusion candidates are control flow equivalent.
-  ///
-  /// Two fusion candidates are control flow equivalent if when one executes,
-  /// the other is guaranteed to execute. This is determined using dominators
-  /// and post-dominators: if A dominates B and B post-dominates A then A and B
-  /// are control-flow equivalent.
-  bool isControlFlowEquivalent(const FusionCandidate &FC0,
-                               const FusionCandidate &FC1) const {
-    assert(FC0.Preheader && FC1.Preheader && "Expecting valid preheaders");
-
-    return ::isControlFlowEquivalent(*FC0.getEntryBlock(), *FC1.getEntryBlock(),
-                                     DT, PDT);
-  }
-
   /// Iterate over all loops in the given loop set and identify the loops that
   /// are eligible for fusion. Place all eligible fusion candidates into Control
   /// Flow Equivalent sets, sorted by dominance.
@@ -673,34 +585,42 @@ struct LoopFuser {
       if (!CurrCand.isEligibleForFusion(SE))
         continue;
 
-      // Go through each list in FusionCandidates and determine if L is control
-      // flow equivalent with the first loop in that list. If it is, append LV.
+      // Go through each list in FusionCandidates and determine if the first or
+      // last loop in the list is strictly adjacent to L. If it is, append L.
       // If not, go to the next list.
       // If no suitable list is found, start another list and add it to
       // FusionCandidates.
-      bool FoundSet = false;
-
-      for (auto &CurrCandSet : FusionCandidates) {
-        if (isControlFlowEquivalent(*CurrCandSet.begin(), CurrCand)) {
-          CurrCandSet.insert(CurrCand);
-          FoundSet = true;
+      bool FoundAdjacent = false;
+      for (auto &CurrCandList : FusionCandidates) {
+        if (isStrictlyAdjacent(CurrCand, CurrCandList.front())) {
+          CurrCandList.push_front(CurrCand);
+          FoundAdjacent = true;
 #ifndef NDEBUG
           if (VerboseFusionDebugging)
             LLVM_DEBUG(dbgs() << "Adding " << CurrCand
-                              << " to existing candidate set\n");
+                              << " to existing candidate list\n");
+#endif
+          break;
+        } else if (isStrictlyAdjacent(CurrCandList.back(), CurrCand)) {
+          CurrCandList.push_back(CurrCand);
+          FoundAdjacent = true;
+#ifndef NDEBUG
+          if (VerboseFusionDebugging)
+            LLVM_DEBUG(dbgs() << "Adding " << CurrCand
+                              << " to existing candidate list\n");
 #endif
           break;
         }
       }
-      if (!FoundSet) {
-        // No set was found. Create a new set and add to FusionCandidates
+      if (!FoundAdjacent) {
+        // No list was found. Create a new list and add to FusionCandidates
 #ifndef NDEBUG
         if (VerboseFusionDebugging)
-          LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new set\n");
+          LLVM_DEBUG(dbgs() << "Adding " << CurrCand << " to new list\n");
 #endif
-        FusionCandidateSet NewCandSet;
-        NewCandSet.insert(CurrCand);
-        FusionCandidates.push_back(NewCandSet);
+        FusionCandidateList NewCandList;
+        NewCandList.push_back(CurrCand);
+        FusionCandidates.push_back(NewCandList);
       }
       NumFusionCandidates++;
     }
@@ -849,218 +769,205 @@ struct LoopFuser {
     }
   }
 
-  /// Walk each set of control flow equivalent fusion candidates and attempt to
-  /// fuse them. This does a single linear traversal of all candidates in the
-  /// set. The conditions for legal fusion are checked at this point. If a pair
-  /// of fusion candidates passes all legality checks, they are fused together
-  /// and a new fusion candidate is created and added to the FusionCandidateSet.
+  /// Walk each set of strictly adjacent fusion candidates and attempt to fuse
+  /// them. This does a single linear traversal of all candidates in the list.
+  /// The conditions for legal fusion are checked at this point. If a pair of
+  /// fusion candidates passes all legality checks, they are fused together and
+  /// a new fusion candidate is created and added to the FusionCandidateList.
   /// The original fusion candidates are then removed, as they are no longer
   /// valid.
   bool fuseCandidates() {
     bool Fused = false;
     LLVM_DEBUG(printFusionCandidates(FusionCandidates));
-    for (auto &CandidateSet : FusionCandidates) {
-      if (CandidateSet.size() < 2)
+    for (auto &CandidateList : FusionCandidates) {
+      if (CandidateList.size() < 2)
         continue;
 
-      LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate Set:\n"
-                        << CandidateSet << "\n");
-
-      for (auto FC0 = CandidateSet.begin(); FC0 != CandidateSet.end(); ++FC0) {
-        assert(!LDT.isRemovedLoop(FC0->L) &&
-               "Should not have removed loops in CandidateSet!");
-        auto FC1 = FC0;
-        for (++FC1; FC1 != CandidateSet.end(); ++FC1) {
-          assert(!LDT.isRemovedLoop(FC1->L) &&
-                 "Should not have removed loops in CandidateSet!");
-
-          LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0->dump();
-                     dbgs() << " with\n"; FC1->dump(); dbgs() << "\n");
-
-          FC0->verify();
-          FC1->verify();
-
-          // Check if the candidates have identical tripcounts (first value of
-          // pair), and if not check the difference in the tripcounts between
-          // the loops (second value of pair). The difference is not equal to
-          // std::nullopt iff the loops iterate a constant number of times, and
-          // have a single exit.
-          std::pair<bool, std::optional<unsigned>> IdenticalTripCountRes =
-              haveIdenticalTripCounts(*FC0, *FC1);
-          bool SameTripCount = IdenticalTripCountRes.first;
-          std::optional<unsigned> TCDifference = IdenticalTripCountRes.second;
-
-          // Here we are checking that FC0 (the first loop) can be peeled, and
-          // both loops have different tripcounts.
-          if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
-            if (*TCDifference > FusionPeelMaxCount) {
-              LLVM_DEBUG(dbgs()
-                         << "Difference in loop trip counts: " << *TCDifference
-                         << " is greater than maximum peel count specificed: "
-                         << FusionPeelMaxCount << "\n");
-            } else {
-              // Dependent on peeling being performed on the first loop, and
-              // assuming all other conditions for fusion return true.
-              SameTripCount = true;
-            }
-          }
+      LLVM_DEBUG(dbgs() << "Attempting fusion on Candidate List:\n"
+                        << CandidateList << "\n");
 
-          if (!SameTripCount) {
-            LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
-                                 "counts. Not fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       NonEqualTripCount);
-            continue;
-          }
+      for (auto It = CandidateList.begin(), NextIt = std::next(It);
+           NextIt != CandidateList.end(); It = NextIt, NextIt = std::next(It)) {
 
-          if (!isAdjacent(*FC0, *FC1)) {
-            LLVM_DEBUG(dbgs()
-                       << "Fusion candidates are not adjacent. Not fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, NonAdjacent);
-            continue;
-          }
+        auto FC0 = *It;
+        auto FC1 = *NextIt;
 
-          if ((!FC0->GuardBranch && FC1->GuardBranch) ||
-              (FC0->GuardBranch && !FC1->GuardBranch)) {
-            LLVM_DEBUG(dbgs() << "The one of candidate is guarded while the "
-                                 "another one is not. Not fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(
-                *FC0, *FC1, OnlySecondCandidateIsGuarded);
-            continue;
-          }
+        assert(!LDT.isRemovedLoop(FC0.L) &&
+               "Should not have removed loops in CandidateList!");
+        assert(!LDT.isRemovedLoop(FC1.L) &&
+               "Should not have removed loops in CandidateList!");
 
-          // Ensure that FC0 and FC1 have identical guards.
-          // If one (or both) are not guarded, this check is not necessary.
-          if (FC0->GuardBranch && FC1->GuardBranch &&
-              !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
-            LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
-                                 "guards. Not Fusing.\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       NonIdenticalGuards);
-            continue;
-          }
+        LLVM_DEBUG(dbgs() << "Attempting to fuse candidate \n"; FC0.dump();
+                   dbgs() << " with\n"; FC1.dump(); dbgs() << "\n");
 
-          if (FC0->GuardBranch) {
-            assert(FC1->GuardBranch && "Expecting valid FC1 guard branch");
-
-            if (!isSafeToMoveBefore(*FC0->ExitBlock,
-                                    *FC1->ExitBlock->getFirstNonPHIOrDbg(), DT,
-                                    &PDT, &DI)) {
-              LLVM_DEBUG(dbgs() << "Fusion candidate contains unsafe "
-                                   "instructions in exit block. Not fusing.\n");
-              reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                         NonEmptyExitBlock);
-              continue;
-            }
+        FC0.verify();
+        FC1.verify();
 
-            if (!isSafeToMoveBefore(
-                    *FC1->GuardBranch->getParent(),
-                    *FC0->GuardBranch->getParent()->getTerminator(), DT, &PDT,
-                    &DI)) {
-              LLVM_DEBUG(dbgs()
-                         << "Fusion candidate contains unsafe "
-                            "instructions in guard block. Not fusing.\n");
-              reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                         NonEmptyGuardBlock);
-              continue;
-            }
-          }
-
-          // Check the dependencies across the loops and do not fuse if it would
-          // violate them.
-          if (!dependencesAllowFusion(*FC0, *FC1)) {
-            LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
-            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
-                                                       InvalidDependencies);
-            continue;
-          }
+        // Check if the candidates have identical tripcounts (first value of
+        // pair), and if not check the difference in the tripcounts between
+        // the loops (second value of pair). The difference is not equal to
+        // std::nullopt iff the loops iterate a constant number of times, and
+        // have a single exit.
+        std::pair<bool, std::optional<unsigned>> IdenticalTripCountRes =
+            haveIdenticalTripCounts(FC0, FC1);
+        bool SameTripCount = IdenticalTripCountRes.first;
+        std::optional<unsigned> TCDifference = IdenticalTripCountRes.second;
 
-          // If the second loop has instructions in the pre-header, attempt to
-          // hoist them up to the first loop's pre-header or sink them into the
-          // body of the second loop.
-          SmallVector<Instruction *, 4> SafeToHoist;
-          SmallVector<Instruction *, 4> SafeToSink;
-          // At this point, this is the last remaining legality check.
-          // Which means if we can make this pre-header empty, we can fuse
-          // these loops
-          if (!isEmptyPreheader(*FC1)) {
-            LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
-                                 "preheader.\n");
-
-            // If it is n...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/171889


More information about the llvm-commits mailing list