[llvm] 8a268be - Revert D82927 "[Loop Fusion] Integrate Loop Peeling into Loop Fusion"

Fangrui Song via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 21 12:24:59 PDT 2020


Author: Fangrui Song
Date: 2020-07-21T12:24:50-07:00
New Revision: 8a268bec1b02dd446fbc36e20d0a9af45d764f67

URL: https://github.com/llvm/llvm-project/commit/8a268bec1b02dd446fbc36e20d0a9af45d764f67
DIFF: https://github.com/llvm/llvm-project/commit/8a268bec1b02dd446fbc36e20d0a9af45d764f67.diff

LOG: Revert D82927 "[Loop Fusion] Integrate Loop Peeling into Loop Fusion"

This reverts commit bb8850d34d601d4edd75fd30c07821c05a726c42.

It broke 3 check-llvm-transforms-loopfusion tests in an ASAN build.

LoopFuse.cpp `for (BasicBlock *Pred : predecessors(BB)) {` may operate on a deleted BB.

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopFuse.cpp

Removed: 
    llvm/test/Transforms/LoopFusion/guarded_peel.ll
    llvm/test/Transforms/LoopFusion/guarded_unsafeblock_peel.ll
    llvm/test/Transforms/LoopFusion/nonadjacent_peel.ll
    llvm/test/Transforms/LoopFusion/peel.ll


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index da18812f67b1..20edc8699d79 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -46,7 +46,6 @@
 
 #include "llvm/Transforms/Scalar/LoopFuse.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
 #include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -54,7 +53,6 @@
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
@@ -66,7 +64,6 @@
 #include "llvm/Transforms/Utils.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/CodeMoverUtils.h"
-#include "llvm/Transforms/Utils/UnrollLoop.h"
 
 using namespace llvm;
 
@@ -117,11 +114,6 @@ static cl::opt<FusionDependenceAnalysisChoice> FusionDependenceAnalysis(
                           "Use all available analyses")),
     cl::Hidden, cl::init(FUSION_DEPENDENCE_ANALYSIS_ALL), cl::ZeroOrMore);
 
-static cl::opt<unsigned> FusionPeelMaxCount(
-    "loop-fusion-peel-max-count", cl::init(0), cl::Hidden,
-    cl::desc("Max number of iterations to be peeled from a loop, such that "
-             "fusion can take place"));
-
 #ifndef NDEBUG
 static cl::opt<bool>
     VerboseFusionDebugging("loop-fusion-verbose-debug",
@@ -165,12 +157,6 @@ struct FusionCandidate {
   bool Valid;
   /// Guard branch of the loop, if it exists
   BranchInst *GuardBranch;
-  /// Peeling Paramaters of the Loop.
-  TTI::PeelingPreferences PP;
-  /// Can you Peel this Loop?
-  bool AbleToPeel;
-  /// Has this loop been Peeled
-  bool Peeled;
 
   /// Dominator and PostDominator trees are needed for the
   /// FusionCandidateCompare function, required by FusionCandidateSet to
@@ -182,13 +168,11 @@ struct FusionCandidate {
   OptimizationRemarkEmitter &ORE;
 
   FusionCandidate(Loop *L, const DominatorTree *DT,
-                  const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE,
-                  TTI::PeelingPreferences PP)
+                  const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE)
       : Preheader(L->getLoopPreheader()), Header(L->getHeader()),
         ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
         Latch(L->getLoopLatch()), L(L), Valid(true),
-        GuardBranch(L->getLoopGuardBranch()), PP(PP), AbleToPeel(canPeel(L)),
-        Peeled(false), DT(DT), PDT(PDT), ORE(ORE) {
+        GuardBranch(L->getLoopGuardBranch()), DT(DT), PDT(PDT), ORE(ORE) {
 
     // Walk over all blocks in the loop and check for conditions that may
     // prevent fusion. For each block, walk over all instructions and collect
@@ -259,17 +243,6 @@ struct FusionCandidate {
       return Preheader;
   }
 
-  /// After Peeling the loop is modified quite a bit, hence all of the Blocks
-  /// need to be updated accordingly.
-  void updateAfterPeeling() {
-    Preheader = L->getLoopPreheader();
-    Header = L->getHeader();
-    ExitingBlock = L->getExitingBlock();
-    ExitBlock = L->getExitBlock();
-    Latch = L->getLoopLatch();
-    verify();
-  }
-
   /// Given a guarded loop, get the successor of the guard that is not in the
   /// loop.
   ///
@@ -281,8 +254,6 @@ struct FusionCandidate {
     assert(GuardBranch && "Only valid on guarded loops.");
     assert(GuardBranch->isConditional() &&
            "Expecting guard to be a conditional branch.");
-    if (Peeled)
-      return GuardBranch->getSuccessor(1);
     return (GuardBranch->getSuccessor(0) == Preheader)
                ? GuardBranch->getSuccessor(1)
                : GuardBranch->getSuccessor(0);
@@ -544,17 +515,13 @@ struct LoopFuser {
   ScalarEvolution &SE;
   PostDominatorTree &PDT;
   OptimizationRemarkEmitter &ORE;
-  AssumptionCache &AC;
-
-  const TargetTransformInfo &TTI;
 
 public:
   LoopFuser(LoopInfo &LI, DominatorTree &DT, DependenceInfo &DI,
             ScalarEvolution &SE, PostDominatorTree &PDT,
-            OptimizationRemarkEmitter &ORE, const DataLayout &DL,
-            AssumptionCache &AC, const TargetTransformInfo &TTI)
+            OptimizationRemarkEmitter &ORE, const DataLayout &DL)
       : LDT(LI), DTU(DT, PDT, DomTreeUpdater::UpdateStrategy::Lazy), LI(LI),
-        DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE), AC(AC), TTI(TTI) {}
+        DT(DT), DI(DI), SE(SE), PDT(PDT), ORE(ORE) {}
 
   /// This is the main entry point for loop fusion. It will traverse the
   /// specified function and collect candidate loops to fuse, starting at the
@@ -639,9 +606,7 @@ struct LoopFuser {
   /// Flow Equivalent sets, sorted by dominance.
   void collectFusionCandidates(const LoopVector &LV) {
     for (Loop *L : LV) {
-      TTI::PeelingPreferences PP =
-          gatherPeelingPreferences(L, SE, TTI, None, None);
-      FusionCandidate CurrCand(L, &DT, &PDT, ORE, PP);
+      FusionCandidate CurrCand(L, &DT, &PDT, ORE);
       if (!CurrCand.isEligibleForFusion(SE))
         continue;
 
@@ -691,126 +656,33 @@ struct LoopFuser {
   /// Determine if two fusion candidates have the same trip count (i.e., they
   /// execute the same number of iterations).
   ///
-  /// This function will return a pair of values. The first is a boolean,
-  /// stating whether or not the two candidates are known at compile time to
-  /// have the same TripCount. The second is the 
diff erence in the two
-  /// TripCounts. This information can be used later to determine whether or not
-  /// peeling can be performed on either one of the candiates.
-  std::pair<bool, Optional<unsigned>>
-  haveIdenticalTripCounts(const FusionCandidate &FC0,
-                          const FusionCandidate &FC1) const {
-
+  /// Note that for now this method simply returns a boolean value because there
+  /// are no mechanisms in loop fusion to handle 
diff erent trip counts. In the
+  /// future, this behaviour can be extended to adjust one of the loops to make
+  /// the trip counts equal (e.g., loop peeling). When this is added, this
+  /// interface may need to change to return more information than just a
+  /// boolean value.
+  bool identicalTripCounts(const FusionCandidate &FC0,
+                           const FusionCandidate &FC1) const {
     const SCEV *TripCount0 = SE.getBackedgeTakenCount(FC0.L);
     if (isa<SCEVCouldNotCompute>(TripCount0)) {
       UncomputableTripCount++;
       LLVM_DEBUG(dbgs() << "Trip count of first loop could not be computed!");
-      return {false, None};
+      return false;
     }
 
     const SCEV *TripCount1 = SE.getBackedgeTakenCount(FC1.L);
     if (isa<SCEVCouldNotCompute>(TripCount1)) {
       UncomputableTripCount++;
       LLVM_DEBUG(dbgs() << "Trip count of second loop could not be computed!");
-      return {false, None};
+      return false;
     }
-
     LLVM_DEBUG(dbgs() << "\tTrip counts: " << *TripCount0 << " & "
                       << *TripCount1 << " are "
                       << (TripCount0 == TripCount1 ? "identical" : "
diff erent")
                       << "\n");
 
-    if (TripCount0 == TripCount1)
-      return {true, 0};
-
-    LLVM_DEBUG(dbgs() << "The loops do not have the same tripcount, "
-                         "determining the 
diff erence between trip counts\n");
-
-    // Currently only considering loops with a single exit point
-    // and a non-constant trip count.
-    unsigned TC0 = SE.getSmallConstantTripCount(FC0.L);
-    unsigned TC1 = SE.getSmallConstantTripCount(FC1.L);
-
-    // If any of the tripcounts are zero that means that loop(s) do not have
-    // a single exit or a constant tripcount.
-    if (TC0 == 0 || TC1 == 0) {
-      LLVM_DEBUG(dbgs() << "Loop(s) do not have a single exit point or do not "
-                           "have a constant number of iterations. Peeling "
-                           "is not benefical\n");
-      return {false, None};
-    }
-
-    Optional<unsigned> Difference = None;
-    int Diff = TC0 - TC1;
-
-    if (Diff > 0)
-      Difference = Diff;
-    else {
-      LLVM_DEBUG(
-          dbgs()
-          << "Difference is less than 0. FC1 (second loop) has more "
-             "iterations than the first one. Currently not supported.\n");
-    }
-
-    LLVM_DEBUG(dbgs() << "Difference in loop trip count is: " << Difference
-                      << "\n");
-
-    return {false, Difference};
-  }
-
-  void peelFusionCandidate(FusionCandidate &FC0, const FusionCandidate &FC1,
-                           unsigned PeelCount) {
-    assert(FC0.AbleToPeel && "Should be able to peel loop");
-
-    LLVM_DEBUG(dbgs() << "Attempting to peel first " << PeelCount
-                      << " iterations of the first loop. \n");
-
-    FC0.Peeled = peelLoop(FC0.L, PeelCount, &LI, &SE, &DT, &AC, true);
-    if (FC0.Peeled) {
-      LLVM_DEBUG(dbgs() << "Done Peeling\n");
-
-#ifndef NDEBUG
-      auto IdenticalTripCount = haveIdenticalTripCounts(FC0, FC1);
-
-      assert(IdenticalTripCount.first && *IdenticalTripCount.second == 0 &&
-             "Loops should have identical trip counts after peeling");
-#endif
-
-      FC0.PP.PeelCount = PeelCount;
-
-      // Peeling does not update the PDT
-      PDT.recalculate(*FC0.Preheader->getParent());
-
-      FC0.updateAfterPeeling();
-
-      // In this case the iterations of the loop are constant, so the first
-      // loop will execute completely (will not jump from one of
-      // the peeled blocks to the second loop). Here we are updating the
-      // branch conditions of each of the peeled blocks, such that it will
-      // branch to its successor which is not the Preheader of the second Loop.
-      // Doing this update will ensure that the entry block of the first loop
-      // dominates the entry block of the second loop.
-      BasicBlock *BB =
-          FC0.GuardBranch ? FC0.ExitBlock->getUniqueSuccessor() : FC1.Preheader;
-      SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
-      for (BasicBlock *Pred : predecessors(BB)) {
-        if (Pred != FC0.ExitBlock) {
-          BranchInst *Old = dyn_cast<BranchInst>(Pred->getTerminator());
-          BasicBlock *Succ = Old->getSuccessor(0);
-          if (Succ == BB)
-            Succ = Old->getSuccessor(1);
-          BranchInst *NewBranch = BranchInst::Create(Succ);
-          ReplaceInstWithInst(Old, NewBranch);
-          TreeUpdates.emplace_back(
-              DominatorTree::UpdateType(DominatorTree::Delete, Pred, BB));
-        }
-      }
-      DTU.applyUpdates(TreeUpdates);
-      DTU.flush();
-      LLVM_DEBUG(
-          dbgs() << "Sucessfully peeled " << FC0.PP.PeelCount
-                 << " iterations from the first loop.\n"
-                    "Both Loops have the same number of iterations now.\n");
-    }
+    return (TripCount0 == TripCount1);
   }
 
   /// Walk each set of control flow equivalent fusion candidates and attempt to
@@ -844,32 +716,7 @@ struct LoopFuser {
           FC0->verify();
           FC1->verify();
 
-          // Check if the candidates have identical tripcounts (first value of
-          // pair), and if not check the 
diff erence in the tripcounts between
-          // the loops (second value of pair). The 
diff erence is not equal to
-          // None iff the loops iterate a constant number of times, and have a
-          // single exit.
-          std::pair<bool, Optional<unsigned>> IdenticalTripCountRes =
-              haveIdenticalTripCounts(*FC0, *FC1);
-          bool SameTripCount = IdenticalTripCountRes.first;
-          Optional<unsigned> TCDifference = IdenticalTripCountRes.second;
-
-          // Here we are checking that FC0 (the first loop) can be peeled, and
-          // both loops have 
diff erent tripcounts.
-          if (FC0->AbleToPeel && !SameTripCount && TCDifference) {
-            if (*TCDifference > FusionPeelMaxCount) {
-              LLVM_DEBUG(dbgs()
-                         << "Difference in loop trip counts: " << *TCDifference
-                         << " is greater than maximum peel count specificed: "
-                         << FusionPeelMaxCount << "\n");
-            } else {
-              // Dependent on peeling being performed on the first loop, and
-              // assuming all other conditions for fusion return true.
-              SameTripCount = true;
-            }
-          }
-
-          if (!SameTripCount) {
+          if (!identicalTripCounts(*FC0, *FC1)) {
             LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
                                  "counts. Not fusing.\n");
             reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
@@ -887,7 +734,7 @@ struct LoopFuser {
           // Ensure that FC0 and FC1 have identical guards.
           // If one (or both) are not guarded, this check is not necessary.
           if (FC0->GuardBranch && FC1->GuardBranch &&
-              !haveIdenticalGuards(*FC0, *FC1) && !TCDifference) {
+              !haveIdenticalGuards(*FC0, *FC1)) {
             LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical "
                                  "guards. Not Fusing.\n");
             reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
@@ -956,23 +803,13 @@ struct LoopFuser {
           LLVM_DEBUG(dbgs() << "\tFusion is performed: " << *FC0 << " and "
                             << *FC1 << "\n");
 
-          FusionCandidate FC0Copy = *FC0;
-          // Peel the loop after determining that fusion is legal. The Loops
-          // will still be safe to fuse after the peeling is performed.
-          bool Peel = TCDifference && *TCDifference > 0;
-          if (Peel)
-            peelFusionCandidate(FC0Copy, *FC1, *TCDifference);
-
           // Report fusion to the Optimization Remarks.
           // Note this needs to be done *before* performFusion because
           // performFusion will change the original loops, making it not
           // possible to identify them after fusion is complete.
-          reportLoopFusion<OptimizationRemark>((Peel ? FC0Copy : *FC0), *FC1,
-                                               FuseCounter);
+          reportLoopFusion<OptimizationRemark>(*FC0, *FC1, FuseCounter);
 
-          FusionCandidate FusedCand(
-              performFusion((Peel ? FC0Copy : *FC0), *FC1), &DT, &PDT, ORE,
-              FC0Copy.PP);
+          FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT, ORE);
           FusedCand.verify();
           assert(FusedCand.isEligibleForFusion(SE) &&
                  "Fused candidate should be eligible for fusion!");
@@ -1249,17 +1086,16 @@ struct LoopFuser {
       return (FC1.GuardBranch->getSuccessor(1) == FC1.Preheader);
   }
 
-  /// Modify the latch branch of FC to be unconditional since successors of the
-  /// branch are the same.
+  /// Simplify the condition of the latch branch of \p FC to true, when both of
+  /// its successors are the same.
   void simplifyLatchBranch(const FusionCandidate &FC) const {
     BranchInst *FCLatchBranch = dyn_cast<BranchInst>(FC.Latch->getTerminator());
     if (FCLatchBranch) {
       assert(FCLatchBranch->isConditional() &&
              FCLatchBranch->getSuccessor(0) == FCLatchBranch->getSuccessor(1) &&
              "Expecting the two successors of FCLatchBranch to be the same");
-      BranchInst *NewBranch =
-          BranchInst::Create(FCLatchBranch->getSuccessor(0));
-      ReplaceInstWithInst(FCLatchBranch, NewBranch);
+      FCLatchBranch->setCondition(
+          llvm::ConstantInt::getTrue(FCLatchBranch->getCondition()->getType()));
     }
   }
 
@@ -1319,8 +1155,7 @@ struct LoopFuser {
     if (FC0.GuardBranch)
       return fuseGuardedLoops(FC0, FC1);
 
-    assert(FC1.Preheader ==
-           (FC0.Peeled ? FC0.ExitBlock->getUniqueSuccessor() : FC0.ExitBlock));
+    assert(FC1.Preheader == FC0.ExitBlock);
     assert(FC1.Preheader->size() == 1 &&
            FC1.Preheader->getSingleSuccessor() == FC1.Header);
 
@@ -1343,7 +1178,7 @@ struct LoopFuser {
     FC0.Latch->replaceSuccessorsPhiUsesWith(FC1.Latch);
 
     // Then modify the control flow and update DT and PDT.
-    SmallVector<DominatorTree::UpdateType, 16> TreeUpdates;
+    SmallVector<DominatorTree::UpdateType, 8> TreeUpdates;
 
     // The old exiting block of the first loop (FC0) has to jump to the header
     // of the second as we need to execute the code in the second header block
@@ -1362,27 +1197,12 @@ struct LoopFuser {
     // to FC1.Header? I think this is basically what the three sequences are
     // trying to accomplish; however, doing this directly in the CFG may mean
     // the DT/PDT becomes invalid
-    if (!FC0.Peeled) {
-      FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
-                                                           FC1.Header);
-      TreeUpdates.emplace_back(DominatorTree::UpdateType(
-          DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
-      TreeUpdates.emplace_back(DominatorTree::UpdateType(
-          DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
-    } else {
-      TreeUpdates.emplace_back(DominatorTree::UpdateType(
-          DominatorTree::Delete, FC0.ExitBlock, FC1.Preheader));
-
-      // Remove the ExitBlock of the first Loop (also not needed)
-      FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC0.ExitBlock,
-                                                           FC1.Header);
-      TreeUpdates.emplace_back(DominatorTree::UpdateType(
-          DominatorTree::Delete, FC0.ExitingBlock, FC0.ExitBlock));
-      FC0.ExitBlock->getTerminator()->eraseFromParent();
-      TreeUpdates.emplace_back(DominatorTree::UpdateType(
-          DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
-      new UnreachableInst(FC0.ExitBlock->getContext(), FC0.ExitBlock);
-    }
+    FC0.ExitingBlock->getTerminator()->replaceUsesOfWith(FC1.Preheader,
+                                                         FC1.Header);
+    TreeUpdates.emplace_back(DominatorTree::UpdateType(
+        DominatorTree::Delete, FC0.ExitingBlock, FC1.Preheader));
+    TreeUpdates.emplace_back(DominatorTree::UpdateType(
+        DominatorTree::Insert, FC0.ExitingBlock, FC1.Header));
 
     // The pre-header of L1 is not necessary anymore.
     assert(pred_begin(FC1.Preheader) == pred_end(FC1.Preheader));
@@ -1426,7 +1246,7 @@ struct LoopFuser {
     FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
     FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
 
-    // Modify the latch branch of FC0 to be unconditional as both successors of
+    // Change the condition of FC0 latch branch to true, as both successors of
     // the branch are the same.
     simplifyLatchBranch(FC0);
 
@@ -1448,11 +1268,6 @@ struct LoopFuser {
 
     LI.removeBlock(FC1.Preheader);
     DTU.deleteBB(FC1.Preheader);
-    if (FC0.Peeled) {
-      LI.removeBlock(FC0.ExitBlock);
-      DTU.deleteBB(FC0.ExitBlock);
-    }
-
     DTU.flush();
 
     // Is there a way to keep SE up-to-date so we don't need to forget the loops
@@ -1549,15 +1364,10 @@ struct LoopFuser {
     BasicBlock *FC1GuardBlock = FC1.GuardBranch->getParent();
     BasicBlock *FC0NonLoopBlock = FC0.getNonLoopBlock();
     BasicBlock *FC1NonLoopBlock = FC1.getNonLoopBlock();
-    BasicBlock *FC0ExitBlockSuccessor = FC0.ExitBlock->getUniqueSuccessor();
 
     // Move instructions from the exit block of FC0 to the beginning of the exit
-    // block of FC1, in the case that the FC0 loop has not been peeled. In the
-    // case that FC0 loop is peeled, then move the instructions of the successor
-    // of the FC0 Exit block to the beginning of the exit block of FC1.
-    moveInstructionsToTheBeginning(
-        (FC0.Peeled ? *FC0ExitBlockSuccessor : *FC0.ExitBlock), *FC1.ExitBlock,
-        DT, PDT, DI);
+    // block of FC1.
+    moveInstructionsToTheBeginning(*FC0.ExitBlock, *FC1.ExitBlock, DT, PDT, DI);
 
     // Move instructions from the guard block of FC1 to the end of the guard
     // block of FC0.
@@ -1577,9 +1387,8 @@ struct LoopFuser {
     // for FC1 (where FC1 guard would have gone if FC1 was not executed).
     FC1NonLoopBlock->replacePhiUsesWith(FC1GuardBlock, FC0GuardBlock);
     FC0.GuardBranch->replaceUsesOfWith(FC0NonLoopBlock, FC1NonLoopBlock);
-
-    BasicBlock *BBToUpdate = FC0.Peeled ? FC0ExitBlockSuccessor : FC0.ExitBlock;
-    BBToUpdate->getTerminator()->replaceUsesOfWith(FC1GuardBlock, FC1.Header);
+    FC0.ExitBlock->getTerminator()->replaceUsesOfWith(FC1GuardBlock,
+                                                      FC1.Header);
 
     // The guard of FC1 is not necessary anymore.
     FC1.GuardBranch->eraseFromParent();
@@ -1594,15 +1403,6 @@ struct LoopFuser {
     TreeUpdates.emplace_back(DominatorTree::UpdateType(
         DominatorTree::Insert, FC0GuardBlock, FC1NonLoopBlock));
 
-    if (FC0.Peeled) {
-      // Remove the Block after the ExitBlock of FC0
-      TreeUpdates.emplace_back(DominatorTree::UpdateType(
-          DominatorTree::Delete, FC0ExitBlockSuccessor, FC1GuardBlock));
-      FC0ExitBlockSuccessor->getTerminator()->eraseFromParent();
-      new UnreachableInst(FC0ExitBlockSuccessor->getContext(),
-                          FC0ExitBlockSuccessor);
-    }
-
     assert(pred_begin(FC1GuardBlock) == pred_end(FC1GuardBlock) &&
            "Expecting guard block to have no predecessors");
     assert(succ_begin(FC1GuardBlock) == succ_end(FC1GuardBlock) &&
@@ -1709,7 +1509,7 @@ struct LoopFuser {
     FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header);
     FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header);
 
-    // Modify the latch branch of FC0 to be unconditional as both successors of
+    // Change the condition of FC0 latch branch to true, as both successors of
     // the branch are the same.
     simplifyLatchBranch(FC0);
 
@@ -1740,10 +1540,6 @@ struct LoopFuser {
     LI.removeBlock(FC1GuardBlock);
     LI.removeBlock(FC1.Preheader);
     LI.removeBlock(FC0.ExitBlock);
-    if (FC0.Peeled) {
-      LI.removeBlock(FC0ExitBlockSuccessor);
-      DTU.deleteBB(FC0ExitBlockSuccessor);
-    }
     DTU.deleteBB(FC1GuardBlock);
     DTU.deleteBB(FC1.Preheader);
     DTU.deleteBB(FC0.ExitBlock);
@@ -1810,8 +1606,6 @@ struct LoopFuseLegacy : public FunctionPass {
     AU.addRequired<PostDominatorTreeWrapperPass>();
     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
     AU.addRequired<DependenceAnalysisWrapperPass>();
-    AU.addRequired<AssumptionCacheTracker>();
-    AU.addRequired<TargetTransformInfoWrapperPass>();
 
     AU.addPreserved<ScalarEvolutionWrapperPass>();
     AU.addPreserved<LoopInfoWrapperPass>();
@@ -1828,12 +1622,9 @@ struct LoopFuseLegacy : public FunctionPass {
     auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
     auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
     auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
-    auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-    const TargetTransformInfo &TTI =
-        getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
-    const DataLayout &DL = F.getParent()->getDataLayout();
 
-    LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
     return LF.fuseLoops(F);
   }
 };
@@ -1846,11 +1637,9 @@ PreservedAnalyses LoopFusePass::run(Function &F, FunctionAnalysisManager &AM) {
   auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
   auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
   auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
-  auto &AC = AM.getResult<AssumptionAnalysis>(F);
-  const TargetTransformInfo &TTI = AM.getResult<TargetIRAnalysis>(F);
-  const DataLayout &DL = F.getParent()->getDataLayout();
 
-  LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL, AC, TTI);
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  LoopFuser LF(LI, DT, DI, SE, PDT, ORE, DL);
   bool Changed = LF.fuseLoops(F);
   if (!Changed)
     return PreservedAnalyses::all();
@@ -1873,8 +1662,6 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DependenceAnalysisWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(LoopFuseLegacy, "loop-fusion", "Loop Fusion", false, false)
 
 FunctionPass *llvm::createLoopFusePass() { return new LoopFuseLegacy(); }

diff  --git a/llvm/test/Transforms/LoopFusion/guarded_peel.ll b/llvm/test/Transforms/LoopFusion/guarded_peel.ll
deleted file mode 100644
index 0c8df52017ba..000000000000
--- a/llvm/test/Transforms/LoopFusion/guarded_peel.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s
-
-; This will test if we are able to fuse two guarded loops which have constant
-; but 
diff erent trip counts. The first two iterations of the first loop should
-; be peeled off, and then the loops should be fused together.
-
- at B = common global [1024 x i32] zeroinitializer, align 16
-
-; CHECK: void @main
-; CHECK-NEXT: entry:
-; CHECK: br i1 %cmp4, label %for.first.entry, label %for.end
-; CHECK: for.first.entry
-; CHECK-NEXT: br label %for.first.peel.begin
-; CHECK: for.first.peel.begin:
-; CHECK-NEXT: br label %for.first.peel
-; CHECK: for.first.peel:
-; CHECK: br label %for.first.peel.next
-; CHECK: for.first.peel.next:
-; CHECK-NEXT: br label %for.first.peel2
-; CHECK: for.first.peel2:
-; CHECK: br label %for.first.peel.next1
-; CHECK: for.first.peel.next1:
-; CHECK-NEXT: br label %for.first.peel.next11
-; CHECK: for.first.peel.next11:
-; CHECK-NEXT: br label %for.first.entry.peel.newph
-; CHECK: for.first.entry.peel.newph:
-; CHECK: br label %for.first
-; CHECK: for.first:
-; CHECK: br i1 %cmp3, label %for.first, label %for.second.exit
-; CHECK: for.second.exit:
-; CHECK: br label %for.end
-; CHECK: for.end:
-; CHECK-NEXT: ret void
-
-define void @main(i32* noalias %A) {
-entry:
-  %cmp4 = icmp slt i64 0, 45
-  br i1 %cmp4, label %for.first.entry, label %for.second.guard
-
-for.first.entry:                               ; preds = %entry
-  br label %for.first
-
-for.first:                                         ; preds = %for.first.entry, %for.first
-  %i.05 = phi i64 [ %inc, %for.first ], [ 0, %for.first.entry ]
-  %sub = sub nsw i64 %i.05, 3
-  %add = add nsw i64 %i.05, 3
-  %mul = mul nsw i64 %sub, %add
-  %rem = srem i64 %mul, %i.05
-  %conv = trunc i64 %rem to i32
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.05
-  store i32 %conv, i32* %arrayidx, align 4
-  %inc = add nsw i64 %i.05, 1
-  %cmp = icmp slt i64 %inc, 45
-  br i1 %cmp, label %for.first, label %for.first.exit
-
-for.first.exit:                                 ; preds = %for.first
-  br label %for.second.guard
-
-for.second.guard:                                          ; preds = %for.first.exit, %entry
-  %cmp31 = icmp slt i64 2, 45
-  br i1 %cmp31, label %for.second.entry, label %for.end
-
-for.second.entry:                              ; preds = %for.second.guard
-  br label %for.second
-
-for.second:                                        ; preds = %for.second.entry, %for.second
-  %i1.02 = phi i64 [ %inc14, %for.second ], [ 2, %for.second.entry ]
-  %sub7 = sub nsw i64 %i1.02, 3
-  %add8 = add nsw i64 %i1.02, 3
-  %mul9 = mul nsw i64 %sub7, %add8
-  %rem10 = srem i64 %mul9, %i1.02
-  %conv11 = trunc i64 %rem10 to i32
-  %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.02
-  store i32 %conv11, i32* %arrayidx12, align 4
-  %inc14 = add nsw i64 %i1.02, 1
-  %cmp3 = icmp slt i64 %inc14, 45
-  br i1 %cmp3, label %for.second, label %for.second.exit
-
-for.second.exit:                               ; preds = %for.second
-  br label %for.end
-
-for.end:                                        ; preds = %for.second.exit, %for.second.guard
-  ret void
-}
-

diff  --git a/llvm/test/Transforms/LoopFusion/guarded_unsafeblock_peel.ll b/llvm/test/Transforms/LoopFusion/guarded_unsafeblock_peel.ll
deleted file mode 100644
index 0d825fb751f5..000000000000
--- a/llvm/test/Transforms/LoopFusion/guarded_unsafeblock_peel.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s
-
-; This will test that we do not fuse two guarded loops together.
-; These loops do not have the same trip count, fusing should be possible after
-; peeling the loops. However, the exit block of the first loop makes the loops
-; unsafe to peel.
-; The expected output of this test is the function as below.
-
-; CHECK: void @unsafe_exitblock
-; CHECK: for.first.guard
-; CHECK: br i1 %cmp3, label %for.first.preheader, label %for.second.guard
-; CHECK: for.first.preheader:
-; CHECK-NEXT: br label %for.first
-; CHECK: for.first:
-; CHECK: br i1 %cmp, label %for.first, label %for.first.exit
-; CHECK: for.first.exit:
-; CHECK-NEXT: call void @bar()
-; CHECK-NEXT: br label %for.second.guard
-; CHECK: for.second.guard:
-; CHECK: br i1 %cmp21, label %for.second.preheader, label %for.end
-; CHECK: for.second.preheader:
-; CHECK-NEXT: br label %for.second
-; CHECK: for.second:
-; CHECK: br i1 %cmp2, label %for.second, label %for.second.exit
-; CHECK: for.second.exit:
-; CHECK-NEXT: br label %for.end
-; CHECK: for.end:
-; CHECK-NEXT: ret void
-
-define void @unsafe_exitblock(i32* noalias %A, i32* noalias %B) {
-for.first.guard:
-  %cmp3 = icmp slt i64 0, 45
-  br i1 %cmp3, label %for.first.preheader, label %for.second.guard
-
-for.first.preheader:
-  br label %for.first
-
-for.first:
-  %i.04 = phi i64 [ %inc, %for.first ], [ 0, %for.first.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.04
-  store i32 0, i32* %arrayidx, align 4
-  %inc = add nsw i64 %i.04, 1
-  %cmp = icmp slt i64 %inc, 45
-  br i1 %cmp, label %for.first, label %for.first.exit
-
-for.first.exit:
-  call void @bar()
-  br label %for.second.guard
-
-for.second.guard:
-  %cmp21 = icmp slt i64 2,45
-  br i1 %cmp21, label %for.second.preheader, label %for.end
-
-for.second.preheader:
-  br label %for.second
-
-for.second:
-  %j.02 = phi i64 [ %inc6, %for.second ], [ 2, %for.second.preheader ]
-  %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %j.02
-  store i32 0, i32* %arrayidx4, align 4
-  %inc6 = add nsw i64 %j.02, 1
-  %cmp2 = icmp slt i64 %inc6, 45
-  br i1 %cmp2, label %for.second, label %for.second.exit
-
-for.second.exit:
-  br label %for.end
-
-for.end:
-  ret void
-}
-
-declare void @bar()

diff  --git a/llvm/test/Transforms/LoopFusion/nonadjacent_peel.ll b/llvm/test/Transforms/LoopFusion/nonadjacent_peel.ll
deleted file mode 100644
index 8fe8824aa9fe..000000000000
--- a/llvm/test/Transforms/LoopFusion/nonadjacent_peel.ll
+++ /dev/null
@@ -1,83 +0,0 @@
-; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s
-
-; This will check that we do not fuse these two loops together. These loops are
-; valid cadidates for peeling, however they are not adjacent.
-; The expected output of this test is the function below.
-
-; CHECK: void @function
-; CHECK-NEXT: for.first.preheader:
-; CHECK-NEXT: br label %for.first
-; CHECK: for.first:
-; CHECK: br label %for.first.latch
-; CHECK: for.first.latch:
-; CHECK: br i1 %exitcond4, label %for.first, label %for.first.exit
-; CHECK: for.first.exit:
-; CHECK-NEXT: br label %for.next
-; CHECK: for.next:
-; CHECK-NEXT: br label %for.second.preheader
-; CHECK: for.second.preheader:
-; CHECK: br label %for.second
-; CHECK: for.second:
-; CHECK: br label %for.second.latch
-; CHECK: for.second.latch:
-; CHECK: br i1 %exitcond, label %for.second, label %for.end
-; CHECK: for.end:
-; CHECK-NEXT: ret void
-
- at B = common global [1024 x i32] zeroinitializer, align 16
-
-define void @function(i32* noalias %arg) {
-for.first.preheader:
-  br label %for.first
-
-for.first:                                              ; preds = %for.first.preheader, %for.first.latch
-  %.014 = phi i32 [ 0, %for.first.preheader ], [ %tmp15, %for.first.latch ]
-  %indvars.iv23 = phi i64 [ 0, %for.first.preheader ], [ %indvars.iv.next3, %for.first.latch ]
-  %tmp = add nsw i32 %.014, -3
-  %tmp8 = add nuw nsw i64 %indvars.iv23, 3
-  %tmp9 = trunc i64 %tmp8 to i32
-  %tmp10 = mul nsw i32 %tmp, %tmp9
-  %tmp11 = trunc i64 %indvars.iv23 to i32
-  %tmp12 = srem i32 %tmp10, %tmp11
-  %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv23
-  store i32 %tmp12, i32* %tmp13, align 4
-  br label %for.first.latch
-
-for.first.latch:                                             ; preds = %for.first
-  %indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1
-  %tmp15 = add nuw nsw i32 %.014, 1
-  %exitcond4 = icmp ne i64 %indvars.iv.next3, 100
-  br i1 %exitcond4, label %for.first, label %for.first.exit
-
-for.first.exit:                                            ; preds: %for.first.latch
-  br label %for.next
-
-for.next:                                      ; preds = %for.first.exit
-  br label %for.second.preheader
-
-for.second.preheader:                                   ; preds = %for.next
-  br label %for.second
-
-for.second:                                             ; preds = %for.second.preheader, %for.second.latch
-  %.02 = phi i32 [ 0, %for.second.preheader ], [ %tmp28, %for.second.latch ]
-  %indvars.iv1 = phi i64 [ 3, %for.second.preheader ], [ %indvars.iv.next, %for.second.latch ]
-  %tmp20 = add nsw i32 %.02, -3
-  %tmp21 = add nuw nsw i64 %indvars.iv1, 3
-  %tmp22 = trunc i64 %tmp21 to i32
-  %tmp23 = mul nsw i32 %tmp20, %tmp22
-  %tmp24 = trunc i64 %indvars.iv1 to i32
-  %tmp25 = srem i32 %tmp23, %tmp24
-  %tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1
-  store i32 %tmp25, i32* %tmp26, align 4
-  br label %for.second.latch
-
-for.second.latch:                                             ; preds = %for.second
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
-  %tmp28 = add nuw nsw i32 %.02, 1
-  %exitcond = icmp ne i64 %indvars.iv.next, 100
-  br i1 %exitcond, label %for.second, label %for.end
-
-for.end:                                             ; preds = %for.second.latch
-  ret void
-}
-

diff  --git a/llvm/test/Transforms/LoopFusion/peel.ll b/llvm/test/Transforms/LoopFusion/peel.ll
deleted file mode 100644
index d978fb3ede40..000000000000
--- a/llvm/test/Transforms/LoopFusion/peel.ll
+++ /dev/null
@@ -1,106 +0,0 @@
-; RUN: opt -S -loop-fusion -loop-fusion-peel-max-count=3 < %s | FileCheck %s
-
-; This will test whether we can fuse two loops together if they have constant
-; but a 
diff erent tripcount.
-; The first three iterations of the first loop should be peeled, and then the
-; two loops should be fused together in this example.
-
-; C Code
-;
-; int B[1024];
-;
-; void function(int *arg) {
-;   for (int i = 0; i != 100; ++i)
-;     arg[i] = ((i - 3)*(i+3)) % i;
-;
-;   for (int i = 3; i != 100; ++i)
-;     B[i] = ((i-6)*(i+3)) % i;
-; }
-
-; CHECK: void @function
-; CHECK-NEXT: for.first.preheader:
-; CHECK-NEXT: br label %for.first.peel.begin
-; CHECK: for.first.peel.begin:
-; CHECK-NEXT: br label %for.first.peel
-; CHECK: for.first.peel
-; CHECK: br label %for.first.latch.peel
-; CHECK: for.first.latch.peel:
-; CHECK: br label %for.first.peel.next
-; CHECK: for.first.peel.next:
-; CHECK-NEXT: br label %for.first.peel2
-; CHECK: for.first.peel2:
-; CHECK: br label %for.first.latch.peel10
-; CHECK: for.first.latch.peel10:
-; CHECK: br label %for.first.peel.next1
-; CHECK: for.first.peel.next1:
-; CHECK-NEXT: br label %for.first.peel15
-; CHECK: for.first.peel15:
-; CHECK: br label %for.first.latch.peel23
-; CHECK: for.first.latch.peel23:
-; CHECK: br label %for.first.peel.next14
-; CHECK: for.first.peel.next14:
-; CHECK-NEXT: br label %for.first.peel.next27
-; CHECK: for.first.peel.next27:
-; CHECK-NEXT: br label %for.first.preheader.peel.newph
-; CHECK: for.first.preheader.peel.newph:
-; CHECK-NEXT: br label %for.first
-; CHECK: for.first:
-; CHECK: br label %for.first.latch
-; CHECK: for.first.latch:
-; CHECK: br label %for.second.latch
-; CHECK: for.second.latch:
-; CHECK: br i1 %exitcond, label %for.first, label %for.end
-; CHECK: for.end:
-; CHECK-NEXT: ret void
-
- at B = common global [1024 x i32] zeroinitializer, align 16
-
-define void @function(i32* noalias %arg) {
-for.first.preheader:
-  br label %for.first
-
-for.first:                                              ; preds = %for.first.preheader, %for.first.latch
-  %.014 = phi i32 [ 0, %for.first.preheader ], [ %tmp15, %for.first.latch ]
-  %indvars.iv23 = phi i64 [ 0, %for.first.preheader ], [ %indvars.iv.next3, %for.first.latch ]
-  %tmp = add nsw i32 %.014, -3
-  %tmp8 = add nuw nsw i64 %indvars.iv23, 3
-  %tmp9 = trunc i64 %tmp8 to i32
-  %tmp10 = mul nsw i32 %tmp, %tmp9
-  %tmp11 = trunc i64 %indvars.iv23 to i32
-  %tmp12 = srem i32 %tmp10, %tmp11
-  %tmp13 = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv23
-  store i32 %tmp12, i32* %tmp13, align 4
-  br label %for.first.latch
-
-for.first.latch:                                             ; preds = %for.first
-  %indvars.iv.next3 = add nuw nsw i64 %indvars.iv23, 1
-  %tmp15 = add nuw nsw i32 %.014, 1
-  %exitcond4 = icmp ne i64 %indvars.iv.next3, 100
-  br i1 %exitcond4, label %for.first, label %for.second.preheader
-
-for.second.preheader:                                   ; preds = %for.first.latch
-  br label %for.second
-
-for.second:                                             ; preds = %for.second.preheader, %for.second.latch
-  %.02 = phi i32 [ 0, %for.second.preheader ], [ %tmp28, %for.second.latch ]
-  %indvars.iv1 = phi i64 [ 3, %for.second.preheader ], [ %indvars.iv.next, %for.second.latch ]
-  %tmp20 = add nsw i32 %.02, -3
-  %tmp21 = add nuw nsw i64 %indvars.iv1, 3
-  %tmp22 = trunc i64 %tmp21 to i32
-  %tmp23 = mul nsw i32 %tmp20, %tmp22
-  %tmp24 = trunc i64 %indvars.iv1 to i32
-  %tmp25 = srem i32 %tmp23, %tmp24
-  %tmp26 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv1
-  store i32 %tmp25, i32* %tmp26, align 4
-  br label %for.second.latch
-
-for.second.latch:                                             ; preds = %for.second
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv1, 1
-  %tmp28 = add nuw nsw i32 %.02, 1
-  %exitcond = icmp ne i64 %indvars.iv.next, 100
-  br i1 %exitcond, label %for.second, label %for.end
-
-for.end:                                             ; preds = %for.second.latch
-  ret void
-}
-


        


More information about the llvm-commits mailing list