[llvm] [DependenceAnalysis] Extending SIV to handle separate loops (PR #128782)

Alireza Torabian via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 21 20:00:08 PDT 2025


https://github.com/1997alireza updated https://github.com/llvm/llvm-project/pull/128782

>From 9d511f535ffdf3ce37aa23b7e9b6fda1189f8f8f Mon Sep 17 00:00:00 2001
From: Alireza Torabian <alireza.torabian at huawei.com>
Date: Mon, 24 Feb 2025 11:53:53 -0500
Subject: [PATCH] [DependenceAnalysis] Extending SIV to handle separate loops

When there is a dependency between two memory instructions in separate
loops, SIV will be able to test them and compute the direction and
the distance of the dependency.
---
 .../llvm/Analysis/DependenceAnalysis.h        | 159 +++++--
 llvm/lib/Analysis/DependenceAnalysis.cpp      | 406 +++++++++++-------
 llvm/lib/Transforms/Scalar/LoopFuse.cpp       |  34 ++
 .../PreliminaryNoValidityCheckFixedSize.ll    |   2 +-
 .../DependenceAnalysis/SIVSeparateLoops.ll    | 145 +++++++
 .../LoopFusion/backward_loop_carried.ll       | 185 ++++++++
 llvm/test/Transforms/LoopFusion/simple.ll     |  28 +-
 7 files changed, 758 insertions(+), 201 deletions(-)
 create mode 100644 llvm/test/Analysis/DependenceAnalysis/SIVSeparateLoops.ll
 create mode 100644 llvm/test/Transforms/LoopFusion/backward_loop_carried.ll

diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 426ac757b4b0d..cdf59024d1d3a 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -152,13 +152,28 @@ namespace llvm {
     /// source and destination of the dependence.
     virtual unsigned getLevels() const { return 0; }
 
+    /// getSeparateLevels - Returns the number of separate loops surrounding
+    /// the source and destination of the dependence.
+    virtual unsigned getSeparateLevels() const { return 0; }
+
+    /// getDVEntry - Returns the DV entry associated with a regular or a
+    /// separate level
+    DVEntry getDVEntry(unsigned Level, bool Separate) const;
+
     /// getDirection - Returns the direction associated with a particular
-    /// level.
-    virtual unsigned getDirection(unsigned Level) const { return DVEntry::ALL; }
+    /// level. If Separate is set to true, information about a separate
+    /// level is provided.
+    virtual unsigned getDirection(unsigned Level, bool Separate = false) const {
+      return DVEntry::ALL;
+    }
 
     /// getDistance - Returns the distance (or NULL) associated with a
-    /// particular level.
-    virtual const SCEV *getDistance(unsigned Level) const { return nullptr; }
+    /// particular level. If Separate is set to true, information about
+    /// a separate level is provided.
+    virtual const SCEV *getDistance(unsigned Level,
+                                    bool Separate = false) const {
+      return nullptr;
+    }
 
     /// Check if the direction vector is negative. A negative direction
     /// vector means Src and Dst are reversed in the actual program.
@@ -171,21 +186,35 @@ namespace llvm {
     virtual bool normalize(ScalarEvolution *SE) { return false; }
 
     /// isPeelFirst - Returns true if peeling the first iteration from
-    /// this loop will break this dependence.
-    virtual bool isPeelFirst(unsigned Level) const { return false; }
+    /// this loop will break this dependence. If Separate is set to true,
+    /// information about a separate level is provided.
+    virtual bool isPeelFirst(unsigned Level, bool Separate = false) const {
+      return false;
+    }
 
     /// isPeelLast - Returns true if peeling the last iteration from
-    /// this loop will break this dependence.
-    virtual bool isPeelLast(unsigned Level) const { return false; }
+    /// this loop will break this dependence. If Separate is set to true,
+    /// information about a separate level is provided.
+    virtual bool isPeelLast(unsigned Level, bool Separate = false) const {
+      return false;
+    }
 
     /// isSplitable - Returns true if splitting this loop will break
-    /// the dependence.
-    virtual bool isSplitable(unsigned Level) const { return false; }
+    /// the dependence. If Separate is set to true, information about a
+    /// separate level is provided.
+    virtual bool isSplitable(unsigned Level, bool Separate = false) const {
+      return false;
+    }
+
+    /// inSeparateLoops - Returns true if this level is performed across
+    /// two separate loop nests.
+    virtual bool inSeparateLoops(unsigned Level) const { return false; }
 
     /// isScalar - Returns true if a particular level is scalar; that is,
     /// if no subscript in the source or destination mention the induction
-    /// variable associated with the loop at this level.
-    virtual bool isScalar(unsigned Level) const;
+    /// variable associated with the loop at this level. If Separate is
+    /// set to true, information about a separate level is provided.
+    virtual bool isScalar(unsigned Level, bool Separate = false) const;
 
     /// getNextPredecessor - Returns the value of the NextPredecessor
     /// field.
@@ -245,13 +274,33 @@ namespace llvm {
     /// source and destination of the dependence.
     unsigned getLevels() const override { return Levels; }
 
+    /// getSeparateLevels - Returns the number of separate loops surrounding
+    /// the source and destination of the dependence.
+    unsigned getSeparateLevels() const override { return SeparateLevels; }
+
+    /// getDVEntry - Returns the DV entry associated with a regular or a
+    /// separate level
+    DVEntry getDVEntry(unsigned Level, bool Separate) const {
+      if (!Separate) {
+        assert(0 < Level && Level <= Levels && "Level out of range");
+        return DV[Level - 1];
+      } else {
+        assert(Levels < Level && Level <= Levels + SeparateLevels &&
+              "Separate level out of range");
+        return DVSeparate[Level - Levels - 1];
+      }
+    }
+
     /// getDirection - Returns the direction associated with a particular
-    /// level.
-    unsigned getDirection(unsigned Level) const override;
+    /// level. If Separate is set to true, information about a separate
+    /// level is provided.
+    unsigned getDirection(unsigned Level, bool Separate = false) const override;
 
     /// getDistance - Returns the distance (or NULL) associated with a
-    /// particular level.
-    const SCEV *getDistance(unsigned Level) const override;
+    /// particular level. If Separate is set to true, information about
+    /// a separate level is provided.
+    const SCEV *getDistance(unsigned Level,
+                            bool Separate = false) const override;
 
     /// Check if the direction vector is negative. A negative direction
     /// vector means Src and Dst are reversed in the actual program.
@@ -264,27 +313,37 @@ namespace llvm {
     bool normalize(ScalarEvolution *SE) override;
 
     /// isPeelFirst - Returns true if peeling the first iteration from
-    /// this loop will break this dependence.
-    bool isPeelFirst(unsigned Level) const override;
+    /// this loop will break this dependence. If Separate is set to true,
+    /// information about a separate level is provided.
+    bool isPeelFirst(unsigned Level, bool Separate = false) const override;
 
     /// isPeelLast - Returns true if peeling the last iteration from
-    /// this loop will break this dependence.
-    bool isPeelLast(unsigned Level) const override;
+    /// this loop will break this dependence. If Separate is set to true,
+    /// information about a separate level is provided.
+    bool isPeelLast(unsigned Level, bool Separate = false) const override;
 
     /// isSplitable - Returns true if splitting the loop will break
-    /// the dependence.
-    bool isSplitable(unsigned Level) const override;
+    /// the dependence. If Separate is set to true, information about a
+    /// separate level is provided.
+    bool isSplitable(unsigned Level, bool Separate = false) const override;
+
+    /// inSeparateLoops - Returns true if this level is performed across
+    /// two separate loop nests.
+    bool inSeparateLoops(unsigned Level) const override;
 
     /// isScalar - Returns true if a particular level is scalar; that is,
     /// if no subscript in the source or destination mention the induction
-    /// variable associated with the loop at this level.
-    bool isScalar(unsigned Level) const override;
+    /// variable associated with the loop at this level. If Separate is
+    /// set to true, information about a separate level is provided.
+    bool isScalar(unsigned Level, bool Separate = false) const override;
 
   private:
     unsigned short Levels;
+    unsigned short SeparateLevels;
     bool LoopIndependent;
     bool Consistent; // Init to true, then refine.
     std::unique_ptr<DVEntry[]> DV;
+    std::unique_ptr<DVEntry[]> DVSeparate;
     friend class DependenceInfo;
   };
 
@@ -405,7 +464,8 @@ namespace llvm {
       const SCEV *A;
       const SCEV *B;
       const SCEV *C;
-      const Loop *AssociatedLoop;
+      const Loop *AssociatedSrcLoop;
+      const Loop *AssociatedDstLoop;
 
     public:
       /// isEmpty - Return true if the constraint is of kind Empty.
@@ -449,18 +509,25 @@ namespace llvm {
       /// Otherwise assert.
       const SCEV *getD() const;
 
-      /// getAssociatedLoop - Returns the loop associated with this constraint.
-      const Loop *getAssociatedLoop() const;
+      /// getAssociatedSrcLoop - Returns the source loop associated with this
+      /// constraint.
+      const Loop *getAssociatedSrcLoop() const;
+
+      /// getAssociatedDstLoop - Returns the destination loop associated with
+      /// this constraint.
+      const Loop *getAssociatedDstLoop() const;
 
       /// setPoint - Change a constraint to Point.
-      void setPoint(const SCEV *X, const SCEV *Y, const Loop *CurrentLoop);
+      void setPoint(const SCEV *X, const SCEV *Y, const Loop *CurrentSrcLoop,
+                    const Loop *CurrentDstLoop);
 
       /// setLine - Change a constraint to Line.
-      void setLine(const SCEV *A, const SCEV *B,
-                   const SCEV *C, const Loop *CurrentLoop);
+      void setLine(const SCEV *A, const SCEV *B, const SCEV *C,
+                   const Loop *CurrentSrcLoop, const Loop *CurrentDstLoop);
 
       /// setDistance - Change a constraint to Distance.
-      void setDistance(const SCEV *D, const Loop *CurrentLoop);
+      void setDistance(const SCEV *D, const Loop *CurrentSrcLoop,
+                       const Loop *CurrentDstLoop);
 
       /// setEmpty - Change a constraint to Empty.
       void setEmpty();
@@ -473,6 +540,10 @@ namespace llvm {
       void dump(raw_ostream &OS) const;
     };
 
+    /// Returns true if two loops are the same or they have the same tripcount
+    /// and depth
+    bool areLoopsSimilar(const Loop *SrcLoop, const Loop *DstLoop) const;
+
     /// establishNestingLevels - Examines the loop nesting of the Src and Dst
     /// instructions and establishes their shared loops. Sets the variables
     /// CommonLevels, SrcLevels, and MaxLevels.
@@ -523,10 +594,15 @@ namespace llvm {
     ///     e - 5
     ///     f - 6
     ///     g - 7 = MaxLevels
-    void establishNestingLevels(const Instruction *Src,
-                                const Instruction *Dst);
+    /// SeparateLevels counts the number of loop levels after the common levels
+    /// that are not identical but are considered similar. Two levels are
+    /// considered similar if they have the same trip count and the same 
+    /// nesting depth.
+    /// For example, if loops `c` and `e` are similar, then they contribute to
+    /// the SeparateLevels count and SeparateLevels is set to 1.
+    void establishNestingLevels(const Instruction *Src, const Instruction *Dst);
 
-    unsigned CommonLevels, SrcLevels, MaxLevels;
+    unsigned CommonLevels, SrcLevels, MaxLevels, SeparateLevels;
 
     /// mapSrcLoop - Given one of the loops containing the source, return
     /// its level index in our numbering scheme.
@@ -668,7 +744,8 @@ namespace llvm {
     bool strongSIVtest(const SCEV *Coeff,
                        const SCEV *SrcConst,
                        const SCEV *DstConst,
-                       const Loop *CurrentLoop,
+                       const Loop *CurrentSrcLoop,
+                       const Loop *CurrentDstLoop,
                        unsigned Level,
                        FullDependence &Result,
                        Constraint &NewConstraint) const;
@@ -686,7 +763,8 @@ namespace llvm {
     bool weakCrossingSIVtest(const SCEV *SrcCoeff,
                              const SCEV *SrcConst,
                              const SCEV *DstConst,
-                             const Loop *CurrentLoop,
+                             const Loop *CurrentSrcLoop,
+                             const Loop *CurrentDstLoop,
                              unsigned Level,
                              FullDependence &Result,
                              Constraint &NewConstraint,
@@ -705,7 +783,8 @@ namespace llvm {
                       const SCEV *DstCoeff,
                       const SCEV *SrcConst,
                       const SCEV *DstConst,
-                      const Loop *CurrentLoop,
+                      const Loop *CurrentSrcLoop,
+                      const Loop *CurrentDstLoop,
                       unsigned Level,
                       FullDependence &Result,
                       Constraint &NewConstraint) const;
@@ -723,7 +802,8 @@ namespace llvm {
     bool weakZeroSrcSIVtest(const SCEV *DstCoeff,
                             const SCEV *SrcConst,
                             const SCEV *DstConst,
-                            const Loop *CurrentLoop,
+                            const Loop *CurrentSrcLoop,
+                            const Loop *CurrentDstLoop,
                             unsigned Level,
                             FullDependence &Result,
                             Constraint &NewConstraint) const;
@@ -741,7 +821,8 @@ namespace llvm {
     bool weakZeroDstSIVtest(const SCEV *SrcCoeff,
                             const SCEV *SrcConst,
                             const SCEV *DstConst,
-                            const Loop *CurrentLoop,
+                            const Loop *CurrentSrcLoop,
+                            const Loop *CurrentDstLoop,
                             unsigned Level,
                             FullDependence &Result,
                             Constraint &NewConstraint) const;
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index dc0ed22dbcc0b..311d31de98b48 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -104,6 +104,7 @@ STATISTIC(GCDindependence, "GCD independence");
 STATISTIC(BanerjeeApplications, "Banerjee applications");
 STATISTIC(BanerjeeIndependence, "Banerjee independence");
 STATISTIC(BanerjeeSuccesses, "Banerjee successes");
+STATISTIC(SeparateLoopsConsidered, "Separate loops considered");
 
 static cl::opt<bool>
     Delinearize("da-delinearize", cl::init(true), cl::Hidden,
@@ -255,7 +256,7 @@ bool Dependence::isAnti() const {
 // if no subscript in the source or destination mention the induction
 // variable associated with the loop at this level.
 // Leave this out of line, so it will serve as a virtual method anchor
-bool Dependence::isScalar(unsigned level) const {
+bool Dependence::isScalar(unsigned level, bool Separate) const {
   return false;
 }
 
@@ -332,50 +333,50 @@ bool FullDependence::normalize(ScalarEvolution *SE) {
 // The rest are simple getters that hide the implementation.
 
 // getDirection - Returns the direction associated with a particular level.
-unsigned FullDependence::getDirection(unsigned Level) const {
-  assert(0 < Level && Level <= Levels && "Level out of range");
-  return DV[Level - 1].Direction;
+// If Separate is set to true, information about a separate level is provided.
+unsigned FullDependence::getDirection(unsigned Level, bool Separate) const {
+  return getDVEntry(Level, Separate).Direction;
 }
 
-
-// Returns the distance (or NULL) associated with a particular level.
-const SCEV *FullDependence::getDistance(unsigned Level) const {
-  assert(0 < Level && Level <= Levels && "Level out of range");
-  return DV[Level - 1].Distance;
+// Returns the distance (or NULL) associated with a particular level. If
+// Separate is set to true, information about a separate level is provided.
+const SCEV *FullDependence::getDistance(unsigned Level, bool Separate) const {
+  return getDVEntry(Level, Separate).Distance;
 }
 
-
 // Returns true if a particular level is scalar; that is,
 // if no subscript in the source or destination mention the induction
-// variable associated with the loop at this level.
-bool FullDependence::isScalar(unsigned Level) const {
-  assert(0 < Level && Level <= Levels && "Level out of range");
-  return DV[Level - 1].Scalar;
+// variable associated with the loop at this level. If Separate is set
+// to true, information about a separate level is provided.
+bool FullDependence::isScalar(unsigned Level, bool Separate) const {
+  return getDVEntry(Level, Separate).Scalar;
 }
 
-
 // Returns true if peeling the first iteration from this loop
-// will break this dependence.
-bool FullDependence::isPeelFirst(unsigned Level) const {
-  assert(0 < Level && Level <= Levels && "Level out of range");
-  return DV[Level - 1].PeelFirst;
+// will break this dependence. If Separate is set to true, information
+// about a separate level is provided.
+bool FullDependence::isPeelFirst(unsigned Level, bool Separate) const {
+  return getDVEntry(Level, Separate).PeelFirst;
 }
 
-
 // Returns true if peeling the last iteration from this loop
-// will break this dependence.
-bool FullDependence::isPeelLast(unsigned Level) const {
-  assert(0 < Level && Level <= Levels && "Level out of range");
-  return DV[Level - 1].PeelLast;
+// will break this dependence. If Separate is set to true, information
+// about a separate level is provided.
+bool FullDependence::isPeelLast(unsigned Level, bool Separate) const {
+  return getDVEntry(Level, Separate).PeelLast;
 }
 
-
-// Returns true if splitting this loop will break the dependence.
-bool FullDependence::isSplitable(unsigned Level) const {
-  assert(0 < Level && Level <= Levels && "Level out of range");
-  return DV[Level - 1].Splitable;
+// Returns true if splitting this loop will break the dependence. If
+// Separate is set to true, information about a separate level is provided.
+bool FullDependence::isSplitable(unsigned Level, bool Separate) const {
+  return getDVEntry(Level, Separate).Splitable;
 }
 
+// Returns true if this level is performed across two separate loop nests.
+bool FullDependence::inSeparateLoops(unsigned Level) const {
+  assert(0 < Level && Level <= Levels + SeparateLevels && "Level out of range");
+  return Level > Levels;
+}
 
 //===----------------------------------------------------------------------===//
 // DependenceInfo::Constraint methods
@@ -430,38 +431,50 @@ const SCEV *DependenceInfo::Constraint::getD() const {
   return SE->getNegativeSCEV(C);
 }
 
+// Returns the source loop associated with this constraint.
+const Loop *DependenceInfo::Constraint::getAssociatedSrcLoop() const {
+  assert((Kind == Distance || Kind == Line || Kind == Point) &&
+         "Kind should be Distance, Line, or Point");
+  return AssociatedSrcLoop;
+}
 
-// Returns the loop associated with this constraint.
-const Loop *DependenceInfo::Constraint::getAssociatedLoop() const {
+// Returns the destination loop associated with this constraint.
+const Loop *DependenceInfo::Constraint::getAssociatedDstLoop() const {
   assert((Kind == Distance || Kind == Line || Kind == Point) &&
          "Kind should be Distance, Line, or Point");
-  return AssociatedLoop;
+  return AssociatedDstLoop;
 }
 
 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y,
-                                          const Loop *CurLoop) {
+                                          const Loop *CurSrcLoop,
+                                          const Loop *CurDstLoop) {
   Kind = Point;
   A = X;
   B = Y;
-  AssociatedLoop = CurLoop;
+  AssociatedSrcLoop = CurSrcLoop;
+  AssociatedDstLoop = CurDstLoop;
 }
 
 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB,
-                                         const SCEV *CC, const Loop *CurLoop) {
+                                         const SCEV *CC, const Loop *CurSrcLoop,
+                                         const Loop *CurDstLoop) {
   Kind = Line;
   A = AA;
   B = BB;
   C = CC;
-  AssociatedLoop = CurLoop;
+  AssociatedSrcLoop = CurSrcLoop;
+  AssociatedDstLoop = CurDstLoop;
 }
 
 void DependenceInfo::Constraint::setDistance(const SCEV *D,
-                                             const Loop *CurLoop) {
+                                             const Loop *CurSrcLoop,
+                                             const Loop *CurDstLoop) {
   Kind = Distance;
   A = SE->getOne(D->getType());
   B = SE->getNegativeSCEV(A);
   C = SE->getNegativeSCEV(D);
-  AssociatedLoop = CurLoop;
+  AssociatedSrcLoop = CurSrcLoop;
+  AssociatedDstLoop = CurDstLoop;
 }
 
 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }
@@ -608,8 +621,8 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
         ++DeltaSuccesses;
         return true;
       }
-      if (const SCEVConstant *CUB =
-          collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
+      if (const SCEVConstant *CUB = collectConstantUpperBound(
+              X->getAssociatedSrcLoop(), Prod1->getType())) {
         const APInt &UpperBound = CUB->getAPInt();
         LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
         if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
@@ -618,9 +631,8 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
           return true;
         }
       }
-      X->setPoint(SE->getConstant(Xq),
-                  SE->getConstant(Yq),
-                  X->getAssociatedLoop());
+      X->setPoint(SE->getConstant(Xq), SE->getConstant(Yq),
+                  X->getAssociatedSrcLoop(), X->getAssociatedDstLoop());
       ++DeltaSuccesses;
       return true;
     }
@@ -656,6 +668,7 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
 // For debugging purposes. Dumps a dependence to OS.
 void Dependence::dump(raw_ostream &OS) const {
   bool Splitable = false;
+  bool SeparatesStarted = false;
   if (isConfused())
     OS << "confused";
   else {
@@ -670,19 +683,24 @@ void Dependence::dump(raw_ostream &OS) const {
     else if (isInput())
       OS << "input";
     unsigned Levels = getLevels();
+    unsigned SeparateLevels = getSeparateLevels();
     OS << " [";
-    for (unsigned II = 1; II <= Levels; ++II) {
-      if (isSplitable(II))
+    for (unsigned II = 1; II <= Levels + SeparateLevels; ++II) {
+      if (!SeparatesStarted && inSeparateLoops(II)) {
+        SeparatesStarted = true;
+        OS << "/ ";
+      }
+      if (isSplitable(II, SeparatesStarted))
         Splitable = true;
-      if (isPeelFirst(II))
+      if (isPeelFirst(II, SeparatesStarted))
         OS << 'p';
-      const SCEV *Distance = getDistance(II);
+      const SCEV *Distance = getDistance(II, SeparatesStarted);
       if (Distance)
         OS << *Distance;
-      else if (isScalar(II))
+      else if (isScalar(II, SeparatesStarted))
         OS << "S";
       else {
-        unsigned Direction = getDirection(II);
+        unsigned Direction = getDirection(II, SeparatesStarted);
         if (Direction == DVEntry::ALL)
           OS << "*";
         else {
@@ -694,9 +712,9 @@ void Dependence::dump(raw_ostream &OS) const {
             OS << ">";
         }
       }
-      if (isPeelLast(II))
+      if (isPeelLast(II, SeparatesStarted))
         OS << 'p';
-      if (II < Levels)
+      if (II < Levels + SeparateLevels)
         OS << " ";
     }
     if (isLoopIndependent())
@@ -758,6 +776,34 @@ bool isLoadOrStore(const Instruction *I) {
   return false;
 }
 
+// Returns true if two loops are the same or they have the same tripcount and
+// depth
+bool DependenceInfo::areLoopsSimilar(const Loop *SrcLoop,
+                                     const Loop *DstLoop) const {
+  if (SrcLoop == DstLoop)
+    return true;
+
+  if (SrcLoop->getLoopDepth() != DstLoop->getLoopDepth())
+    return false;
+
+  if (!SrcLoop || !SrcLoop->getLoopLatch() || !DstLoop ||
+      !DstLoop->getLoopLatch())
+    return false;
+
+  const SCEV *SrcUB, *DstUP;
+  if (SE->hasLoopInvariantBackedgeTakenCount(SrcLoop))
+    SrcUB = SE->getBackedgeTakenCount(SrcLoop);
+  if (SE->hasLoopInvariantBackedgeTakenCount(DstLoop))
+    DstUP = SE->getBackedgeTakenCount(DstLoop);
+
+  if (SrcUB == nullptr || DstUP == nullptr)
+    return false;
+
+  if (SE->isKnownPredicate(ICmpInst::ICMP_EQ, SrcUB, DstUP))
+    return true;
+
+  return false;
+}
 
 // Examines the loop nesting of the Src and Dst
 // instructions and establishes their shared loops. Sets the variables
@@ -809,6 +855,11 @@ bool isLoadOrStore(const Instruction *I) {
 //     e - 5
 //     f - 6
 //     g - 7 = MaxLevels
+// SeparateLevels counts the number of levels after common levels that are
+// not common but are similar, meaning that they have the same tripcount
+// and depth. Assume that in this code fragment, levels c and e are
+// similar. In this case only the loop nests at the next level after 
+// common levels are similar, and SeparateLevel is set to 1.
 void DependenceInfo::establishNestingLevels(const Instruction *Src,
                                             const Instruction *Dst) {
   const BasicBlock *SrcBlock = Src->getParent();
@@ -819,6 +870,7 @@ void DependenceInfo::establishNestingLevels(const Instruction *Src,
   const Loop *DstLoop = LI->getLoopFor(DstBlock);
   SrcLevels = SrcLevel;
   MaxLevels = SrcLevel + DstLevel;
+  SeparateLevels = 0;
   while (SrcLevel > DstLevel) {
     SrcLoop = SrcLoop->getParentLoop();
     SrcLevel--;
@@ -827,16 +879,23 @@ void DependenceInfo::establishNestingLevels(const Instruction *Src,
     DstLoop = DstLoop->getParentLoop();
     DstLevel--;
   }
+  // find the first separate similar level
+  while (!areLoopsSimilar(SrcLoop, DstLoop)) {
+    SrcLoop = SrcLoop->getParentLoop();
+    DstLoop = DstLoop->getParentLoop();
+    SrcLevel--;
+  }
+  // continue to find the first common level
   while (SrcLoop != DstLoop) {
     SrcLoop = SrcLoop->getParentLoop();
     DstLoop = DstLoop->getParentLoop();
     SrcLevel--;
+    SeparateLevels++;
   }
   CommonLevels = SrcLevel;
   MaxLevels -= CommonLevels;
 }
 
-
 // Given one of the loops containing the source, return
 // its level index in our numbering scheme.
 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
@@ -1223,8 +1282,9 @@ bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
 //
 // Return true if dependence disproved.
 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
-                                   const SCEV *DstConst, const Loop *CurLoop,
-                                   unsigned Level, FullDependence &Result,
+                                   const SCEV *DstConst, const Loop *CurSrcLoop,
+                                   const Loop *CurDstLoop, unsigned Level,
+                                   FullDependence &Result,
                                    Constraint &NewConstraint) const {
   LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
   LLVM_DEBUG(dbgs() << "\t    Coeff = " << *Coeff);
@@ -1242,7 +1302,8 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
   LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
 
   // check that |Delta| < iteration count
-  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+  if (const SCEV *UpperBound =
+          collectUpperBound(CurSrcLoop, Delta->getType())) {
     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound);
     LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
     const SCEV *AbsDelta =
@@ -1275,7 +1336,8 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
       return true;
     }
     Result.DV[Level].Distance = SE->getConstant(Distance);
-    NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
+    NewConstraint.setDistance(SE->getConstant(Distance), CurSrcLoop,
+                              CurDstLoop);
     if (Distance.sgt(0))
       Result.DV[Level].Direction &= Dependence::DVEntry::LT;
     else if (Distance.slt(0))
@@ -1287,7 +1349,7 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
   else if (Delta->isZero()) {
     // since 0/X == 0
     Result.DV[Level].Distance = Delta;
-    NewConstraint.setDistance(Delta, CurLoop);
+    NewConstraint.setDistance(Delta, CurSrcLoop, CurDstLoop);
     Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
     ++StrongSIVsuccesses;
   }
@@ -1295,13 +1357,12 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
     if (Coeff->isOne()) {
       LLVM_DEBUG(dbgs() << "\t    Distance = " << *Delta << "\n");
       Result.DV[Level].Distance = Delta; // since X/1 == X
-      NewConstraint.setDistance(Delta, CurLoop);
+      NewConstraint.setDistance(Delta, CurSrcLoop, CurDstLoop);
     }
     else {
       Result.Consistent = false;
-      NewConstraint.setLine(Coeff,
-                            SE->getNegativeSCEV(Coeff),
-                            SE->getNegativeSCEV(Delta), CurLoop);
+      NewConstraint.setLine(Coeff, SE->getNegativeSCEV(Coeff),
+                            SE->getNegativeSCEV(Delta), CurSrcLoop, CurDstLoop);
     }
 
     // maybe we can get a useful direction
@@ -1329,7 +1390,6 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
   return false;
 }
 
-
 // weakCrossingSIVtest -
 // From the paper, Practical Dependence Testing, Section 4.2.2
 //
@@ -1360,8 +1420,9 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
 // Return true if dependence disproved.
 bool DependenceInfo::weakCrossingSIVtest(
     const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,
-    const Loop *CurLoop, unsigned Level, FullDependence &Result,
-    Constraint &NewConstraint, const SCEV *&SplitIter) const {
+    const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
+    FullDependence &Result, Constraint &NewConstraint,
+    const SCEV *&SplitIter) const {
   LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
   LLVM_DEBUG(dbgs() << "\t    Coeff = " << *Coeff << "\n");
   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
@@ -1372,7 +1433,7 @@ bool DependenceInfo::weakCrossingSIVtest(
   Result.Consistent = false;
   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
-  NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
+  NewConstraint.setLine(Coeff, Coeff, Delta, CurSrcLoop, CurDstLoop);
   if (Delta->isZero()) {
     Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
     Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
@@ -1420,7 +1481,8 @@ bool DependenceInfo::weakCrossingSIVtest(
 
   // We're certain that Delta > 0 and ConstCoeff > 0.
   // Check Delta/(2*ConstCoeff) against upper loop bound
-  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+  if (const SCEV *UpperBound =
+          collectUpperBound(CurSrcLoop, Delta->getType())) {
     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
     const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
     const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
@@ -1474,7 +1536,6 @@ bool DependenceInfo::weakCrossingSIVtest(
   return false;
 }
 
-
 // Kirch's algorithm, from
 //
 //        Optimizing Supercompilers for Supercomputers
@@ -1560,7 +1621,8 @@ static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
 // returns all the dependencies that exist between Dst and Src.
 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
                                   const SCEV *SrcConst, const SCEV *DstConst,
-                                  const Loop *CurLoop, unsigned Level,
+                                  const Loop *CurSrcLoop,
+                                  const Loop *CurDstLoop, unsigned Level,
                                   FullDependence &Result,
                                   Constraint &NewConstraint) const {
   LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
@@ -1575,7 +1637,7 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta,
-                        CurLoop);
+                        CurSrcLoop, CurDstLoop);
   const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
   const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
   const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
@@ -1602,7 +1664,7 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
   bool UMValid = false;
   // UM is perhaps unavailable, let's check
   if (const SCEVConstant *CUB =
-          collectConstantUpperBound(CurLoop, Delta->getType())) {
+          collectConstantUpperBound(CurSrcLoop, Delta->getType())) {
     UM = CUB->getAPInt();
     LLVM_DEBUG(dbgs() << "\t    UM = " << UM << "\n");
     UMValid = true;
@@ -1709,7 +1771,6 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
   return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
 }
 
-
 // Return true if the divisor evenly divides the dividend.
 static
 bool isRemainderZero(const SCEVConstant *Dividend,
@@ -1751,12 +1812,10 @@ bool isRemainderZero(const SCEVConstant *Dividend,
 // (see also weakZeroDstSIVtest)
 //
 // Return true if dependence disproved.
-bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
-                                        const SCEV *SrcConst,
-                                        const SCEV *DstConst,
-                                        const Loop *CurLoop, unsigned Level,
-                                        FullDependence &Result,
-                                        Constraint &NewConstraint) const {
+bool DependenceInfo::weakZeroSrcSIVtest(
+    const SCEV *DstCoeff, const SCEV *SrcConst, const SCEV *DstConst,
+    const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
+    FullDependence &Result, Constraint &NewConstraint) const {
   // For the WeakSIV test, it's possible the loop isn't common to
   // the Src and Dst loops. If it isn't, then there's no need to
   // record a direction.
@@ -1770,7 +1829,7 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
   Result.Consistent = false;
   const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
   NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
-                        CurLoop);
+                        CurSrcLoop, CurDstLoop);
   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
     if (Level < CommonLevels) {
@@ -1791,7 +1850,8 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
 
   // check that Delta/SrcCoeff < iteration count
   // really check NewDelta < count*AbsCoeff
-  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+  if (const SCEV *UpperBound =
+          collectUpperBound(CurSrcLoop, Delta->getType())) {
     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
     const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
     if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
@@ -1829,7 +1889,6 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
   return false;
 }
 
-
 // weakZeroDstSIVtest -
 // From the paper, Practical Dependence Testing, Section 4.2.2
 //
@@ -1861,12 +1920,10 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
 // (see also weakZeroSrcSIVtest)
 //
 // Return true if dependence disproved.
-bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
-                                        const SCEV *SrcConst,
-                                        const SCEV *DstConst,
-                                        const Loop *CurLoop, unsigned Level,
-                                        FullDependence &Result,
-                                        Constraint &NewConstraint) const {
+bool DependenceInfo::weakZeroDstSIVtest(
+    const SCEV *SrcCoeff, const SCEV *SrcConst, const SCEV *DstConst,
+    const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
+    FullDependence &Result, Constraint &NewConstraint) const {
   // For the WeakSIV test, it's possible the loop isn't common to the
   // Src and Dst loops. If it isn't, then there's no need to record a direction.
   LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
@@ -1879,7 +1936,7 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
   Result.Consistent = false;
   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
   NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
-                        CurLoop);
+                        CurSrcLoop, CurDstLoop);
   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
     if (Level < CommonLevels) {
@@ -1900,7 +1957,8 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
 
   // check that Delta/SrcCoeff < iteration count
   // really check NewDelta < count*AbsCoeff
-  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+  if (const SCEV *UpperBound =
+          collectUpperBound(CurSrcLoop, Delta->getType())) {
     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
     const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
     if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
@@ -1938,7 +1996,6 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
   return false;
 }
 
-
 // exactRDIVtest - Tests the RDIV subscript pair for dependence.
 // Things of the form [c1 + a*i] and [c2 + b*j],
 // where i and j are induction variable, c1 and c2 are loop invariant,
@@ -2230,43 +2287,47 @@ bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
     const SCEV *DstConst = DstAddRec->getStart();
     const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
     const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
-    const Loop *CurLoop = SrcAddRec->getLoop();
-    assert(CurLoop == DstAddRec->getLoop() &&
-           "both loops in SIV should be same");
-    Level = mapSrcLoop(CurLoop);
+    const Loop *CurSrcLoop = SrcAddRec->getLoop();
+    const Loop *CurDstLoop = DstAddRec->getLoop();
+    assert(areLoopsSimilar(CurSrcLoop, CurDstLoop) &&
+           "both loops in SIV should be the same or have the same tripcount "
+           "and depth");
+    Level = mapSrcLoop(CurSrcLoop);
     bool disproven;
     if (SrcCoeff == DstCoeff)
-      disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
-                                Level, Result, NewConstraint);
+      disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop,
+                                CurDstLoop, Level, Result, NewConstraint);
     else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
-      disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
-                                      Level, Result, NewConstraint, SplitIter);
+      disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop,
+                                      CurDstLoop, Level, Result, NewConstraint,
+                                      SplitIter);
     else
-      disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
-                               Level, Result, NewConstraint);
-    return disproven ||
-      gcdMIVtest(Src, Dst, Result) ||
-      symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
+      disproven =
+          exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurSrcLoop,
+                       CurDstLoop, Level, Result, NewConstraint);
+    return disproven || gcdMIVtest(Src, Dst, Result) ||
+           symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurSrcLoop,
+                            CurDstLoop);
   }
   if (SrcAddRec) {
     const SCEV *SrcConst = SrcAddRec->getStart();
     const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
     const SCEV *DstConst = Dst;
-    const Loop *CurLoop = SrcAddRec->getLoop();
-    Level = mapSrcLoop(CurLoop);
-    return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
-                              Level, Result, NewConstraint) ||
-      gcdMIVtest(Src, Dst, Result);
+    const Loop *CurSrcLoop = SrcAddRec->getLoop();
+    Level = mapSrcLoop(CurSrcLoop);
+    return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop,
+                              CurSrcLoop, Level, Result, NewConstraint) ||
+           gcdMIVtest(Src, Dst, Result);
   }
   if (DstAddRec) {
     const SCEV *DstConst = DstAddRec->getStart();
     const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
     const SCEV *SrcConst = Src;
-    const Loop *CurLoop = DstAddRec->getLoop();
-    Level = mapDstLoop(CurLoop);
-    return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
-                              CurLoop, Level, Result, NewConstraint) ||
-      gcdMIVtest(Src, Dst, Result);
+    const Loop *CurDstLoop = DstAddRec->getLoop();
+    Level = mapDstLoop(CurDstLoop);
+    return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, CurDstLoop,
+                              CurDstLoop, Level, Result, NewConstraint) ||
+           gcdMIVtest(Src, Dst, Result);
   }
   llvm_unreachable("SIV test expected at least one AddRec");
   return false;
@@ -3171,19 +3232,20 @@ bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
                                        Constraint &CurConstraint,
                                        bool &Consistent) {
-  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+  const Loop *CurSrcLoop = CurConstraint.getAssociatedSrcLoop();
+  const Loop *CurDstLoop = CurConstraint.getAssociatedDstLoop();
   LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
-  const SCEV *A_K = findCoefficient(Src, CurLoop);
+  const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
   if (A_K->isZero())
     return false;
   const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
   Src = SE->getMinusSCEV(Src, DA_K);
-  Src = zeroCoefficient(Src, CurLoop);
+  Src = zeroCoefficient(Src, CurSrcLoop);
   LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
   LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
-  Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
+  Dst = addToCoefficient(Dst, CurDstLoop, SE->getNegativeSCEV(A_K));
   LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
-  if (!findCoefficient(Dst, CurLoop)->isZero())
+  if (!findCoefficient(Dst, CurDstLoop)->isZero())
     Consistent = false;
   return true;
 }
@@ -3197,7 +3259,8 @@ bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
                                    Constraint &CurConstraint,
                                    bool &Consistent) {
-  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+  const Loop *CurSrcLoop = CurConstraint.getAssociatedSrcLoop();
+  const Loop *CurDstLoop = CurConstraint.getAssociatedDstLoop();
   const SCEV *A = CurConstraint.getA();
   const SCEV *B = CurConstraint.getB();
   const SCEV *C = CurConstraint.getC();
@@ -3213,11 +3276,11 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
     APInt Charlie = Cconst->getAPInt();
     APInt CdivB = Charlie.sdiv(Beta);
     assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
-    const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+    const SCEV *AP_K = findCoefficient(Dst, CurDstLoop);
     //    Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
     Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
-    Dst = zeroCoefficient(Dst, CurLoop);
-    if (!findCoefficient(Src, CurLoop)->isZero())
+    Dst = zeroCoefficient(Dst, CurDstLoop);
+    if (!findCoefficient(Src, CurSrcLoop)->isZero())
       Consistent = false;
   }
   else if (B->isZero()) {
@@ -3228,10 +3291,10 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
     APInt Charlie = Cconst->getAPInt();
     APInt CdivA = Charlie.sdiv(Alpha);
     assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
-    const SCEV *A_K = findCoefficient(Src, CurLoop);
+    const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
-    Src = zeroCoefficient(Src, CurLoop);
-    if (!findCoefficient(Dst, CurLoop)->isZero())
+    Src = zeroCoefficient(Src, CurSrcLoop);
+    if (!findCoefficient(Dst, CurDstLoop)->isZero())
       Consistent = false;
   }
   else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
@@ -3242,22 +3305,22 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
     APInt Charlie = Cconst->getAPInt();
     APInt CdivA = Charlie.sdiv(Alpha);
     assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
-    const SCEV *A_K = findCoefficient(Src, CurLoop);
+    const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
-    Src = zeroCoefficient(Src, CurLoop);
-    Dst = addToCoefficient(Dst, CurLoop, A_K);
-    if (!findCoefficient(Dst, CurLoop)->isZero())
+    Src = zeroCoefficient(Src, CurSrcLoop);
+    Dst = addToCoefficient(Dst, CurDstLoop, A_K);
+    if (!findCoefficient(Dst, CurDstLoop)->isZero())
       Consistent = false;
   }
   else {
     // paper is incorrect here, or perhaps just misleading
-    const SCEV *A_K = findCoefficient(Src, CurLoop);
+    const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
     Src = SE->getMulExpr(Src, A);
     Dst = SE->getMulExpr(Dst, A);
     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
-    Src = zeroCoefficient(Src, CurLoop);
-    Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
-    if (!findCoefficient(Dst, CurLoop)->isZero())
+    Src = zeroCoefficient(Src, CurSrcLoop);
+    Dst = addToCoefficient(Dst, CurDstLoop, SE->getMulExpr(A_K, B));
+    if (!findCoefficient(Dst, CurDstLoop)->isZero())
       Consistent = false;
   }
   LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
@@ -3271,17 +3334,18 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
 // Return true if some simplification occurs.
 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,
                                     Constraint &CurConstraint) {
-  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
-  const SCEV *A_K = findCoefficient(Src, CurLoop);
-  const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+  const Loop *CurSrcLoop = CurConstraint.getAssociatedSrcLoop();
+  const Loop *CurDstLoop = CurConstraint.getAssociatedDstLoop();
+  const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
+  const SCEV *AP_K = findCoefficient(Dst, CurDstLoop);
   const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
   const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
   LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
   Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
-  Src = zeroCoefficient(Src, CurLoop);
+  Src = zeroCoefficient(Src, CurSrcLoop);
   LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
   LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
-  Dst = zeroCoefficient(Dst, CurLoop);
+  Dst = zeroCoefficient(Dst, CurDstLoop);
   LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
   return true;
 }
@@ -3617,14 +3681,6 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
     break; // The underlying objects alias; test accesses for dependence.
   }
 
-  // establish loop nesting levels
-  establishNestingLevels(Src, Dst);
-  LLVM_DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
-  LLVM_DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
-
-  FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
-  ++TotalArrayPairs;
-
   unsigned Pairs = 1;
   SmallVector<Subscript, 2> Pair(Pairs);
   const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
@@ -3651,6 +3707,47 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
     }
   }
 
+  // Establish loop nesting levels considering separate but similar loops as
+  // common
+  establishNestingLevels(Src, Dst);
+
+  LLVM_DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
+  LLVM_DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
+  LLVM_DEBUG(dbgs() << "    separate nesting levels = " << SeparateLevels
+                    << "\n");
+
+  // Modify common levels to consider the similar separate levels in the tests
+  CommonLevels += SeparateLevels;
+  MaxLevels -= SeparateLevels;
+  if (SeparateLevels > 0) {
+    // Not all tests are handled yet over separate loops
+    // Revoke if there are any tests other than ZIV, SIV or RDIV
+    for (unsigned P = 0; P < Pairs; ++P) {
+      Pair[P].Loops.resize(MaxLevels + 1);
+      Subscript::ClassificationKind TestClass = classifyPair(
+          Pair[P].Src, LI->getLoopFor(Src->getParent()), Pair[P].Dst,
+          LI->getLoopFor(Dst->getParent()), Pair[P].Loops);
+
+      if (TestClass != Subscript::ZIV &&
+          TestClass != Subscript::SIV &&
+          TestClass != Subscript::RDIV) {
+        // Revert the levels to not consider the separate levels
+        CommonLevels -= SeparateLevels;
+        MaxLevels += SeparateLevels;
+        SeparateLevels = 0;
+        break;
+      }
+    }
+  }
+
+  FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
+  ++TotalArrayPairs;
+
+  if (SeparateLevels > 0) {
+    Result.Consistent = false;
+    SeparateLoopsConsidered++;
+  }
+
   for (unsigned P = 0; P < Pairs; ++P) {
     Pair[P].Loops.resize(MaxLevels + 1);
     Pair[P].GroupLoops.resize(MaxLevels + 1);
@@ -3940,6 +4037,25 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
     if (CompleteLoops[II])
       Result.DV[II - 1].Scalar = false;
 
+  if (SeparateLevels > 0) {
+    // Extracting separate levels from the common levels
+    // Reverting CommonLevels and MaxLevels to their original values
+    assert(CommonLevels >= SeparateLevels);
+    CommonLevels -= SeparateLevels;
+    MaxLevels += SeparateLevels;
+    std::unique_ptr<FullDependence::DVEntry[]> DV, DVSeparate;
+    DV = std::make_unique<FullDependence::DVEntry[]>(CommonLevels);
+    DVSeparate = std::make_unique<FullDependence::DVEntry[]>(SeparateLevels);
+    for (unsigned level = 0; level < CommonLevels; ++level)
+      DV[level] = Result.DV[level];
+    for (unsigned level = 0; level < SeparateLevels; ++level)
+      DVSeparate[level] = Result.DV[CommonLevels + level];
+    Result.DV = std::move(DV);
+    Result.DVSeparate = std::move(DVSeparate);
+    Result.Levels = CommonLevels;
+    Result.SeparateLevels = SeparateLevels;
+  }
+
   if (PossiblyLoopIndependent) {
     // Make sure the LoopIndependent flag is set correctly.
     // All directions must include equal, otherwise no
diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
index eaf89b23c26f7..4646adeffca08 100644
--- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp
@@ -100,6 +100,8 @@ STATISTIC(OnlySecondCandidateIsGuarded,
           "The second candidate is guarded while the first one is not");
 STATISTIC(NumHoistedInsts, "Number of hoisted preheader instructions.");
 STATISTIC(NumSunkInsts, "Number of hoisted preheader instructions.");
+STATISTIC(NumDepSafeFused, "Number of fused loops with dependencies "
+                           "proven safe based on the dependence direction");
 
 enum FusionDependenceAnalysisChoice {
   FUSION_DEPENDENCE_ANALYSIS_SCEV,
@@ -1348,7 +1350,39 @@ struct LoopFuser {
                           << "\n");
       }
 #endif
+      unsigned Levels = DepResult->getLevels();
+      unsigned SeparateLevels = DepResult->getSeparateLevels();
+      unsigned CurLoopLevel = FC0.L->getLoopDepth();
+      bool IsForwardDep = false;
+
+      for (unsigned II = 1; 
+           II <= std::min(CurLoopLevel, Levels + SeparateLevels);
+           ++II) {
+        unsigned Direction = DepResult->getDirection(II, II > Levels);
+        if (!(Direction & Dependence::DVEntry::EQ) &&
+            II < CurLoopLevel) {
+          // No dependency on current level as we have different accesses in 
+          // the previous levels
+          break;
+        }
+        if (!(Direction & Dependence::DVEntry::GT) &&
+            !(Direction & Dependence::DVEntry::EQ)) {
+          // Backward dependency or no dependency on current level
+          break;
+        }
+        if (Direction & Dependence::DVEntry::GT) {
+          // Forward dependency
+          IsForwardDep = true;
+          break;
+        }
+      }
 
+      if (!IsForwardDep) {
+        LLVM_DEBUG(dbgs() << "Safe to fuse with backward loop-carried "
+                             "dependency or no dependency\n");
+        NumDepSafeFused++;
+        return true;
+      }
       if (DepResult->getNextPredecessor() || DepResult->getNextSuccessor())
         LLVM_DEBUG(
             dbgs() << "TODO: Implement pred/succ dependence handling!\n");
diff --git a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
index 404018707c0a5..0811a56b495c4 100644
--- a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
@@ -20,7 +20,7 @@ define void @p2(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i64 %i.011, ptr %arrayidx8, align 8 --> Dst: store i64 %i.011, ptr %arrayidx8, align 8
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i64 %i.011, ptr %arrayidx8, align 8 --> Dst: %0 = load i64, ptr %arrayidx17, align 8
-; CHECK-NEXT:    da analyze - flow [-3 -2]!
+; CHECK-NEXT:    da analyze - flow [-3 -2 / -1]!
 ; CHECK-NEXT:  Src: store i64 %i.011, ptr %arrayidx8, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i64, ptr %arrayidx17, align 8 --> Dst: %0 = load i64, ptr %arrayidx17, align 8
diff --git a/llvm/test/Analysis/DependenceAnalysis/SIVSeparateLoops.ll b/llvm/test/Analysis/DependenceAnalysis/SIVSeparateLoops.ll
new file mode 100644
index 0000000000000..1e6249133f155
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/SIVSeparateLoops.ll
@@ -0,0 +1,145 @@
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
+; RUN:   -da-disable-delinearization-checks | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+;;  for (long int i = 0; i < n; i++) {
+;;    for (long int j = 0; j < n; j++) {
+;;      for (long int k = 0; k < n; k++) {
+;;        for (long int l = 0; l < n; l++)
+;;          A[i][j][k][l] = i;
+;;      }
+;;      for (long int k = 1; k < n+1; k++) {
+;;        for (long int l = 0; l < n; l++)
+;;          *B++ = A[i + 4][j + 3][k + 2][l + 1];
+
+define void @SIVSeparate(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp {
+entry:
+  %cmp10 = icmp sgt i64 %n, 0
+  br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end35
+  
+; CHECK-LABEL: SIVSeparate
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [-4 -3 / -3 -1]!   
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* * * *]!
+  
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc33
+  %B.addr.014 = phi ptr [ %B.addr.1.lcssa, %for.inc33 ], [ %B, %for.cond1.preheader.preheader ]
+  %i.013 = phi i64 [ %inc34, %for.inc33 ], [ 0, %for.cond1.preheader.preheader ]
+  %cmp28 = icmp sgt i64 %n, 0
+  br i1 %cmp28, label %for.cond4.preheader.preheader, label %for.inc33
+
+for.cond4.preheader.preheader:                    ; preds = %for.cond1.preheader
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.inc30
+  %B.addr.110 = phi ptr [ %B.addr.2.lcssa, %for.inc30 ], [ %B.addr.014, %for.cond4.preheader.preheader ]
+  %j.09 = phi i64 [ %inc31, %for.inc30 ], [ 0, %for.cond4.preheader.preheader ]
+  %cmp53 = icmp sgt i64 %n, 0
+  br i1 %cmp53, label %for.cond7.preheader.preheader, label %for.cond15.loopexit
+
+for.cond7.preheader.preheader:                    ; preds = %for.cond4.preheader
+  br label %for.cond7.preheader
+
+for.cond7.preheader:                              ; preds = %for.cond7.preheader.preheader, %for.inc12
+  %k.07 = phi i64 [ %inc13, %for.inc12 ], [ 0, %for.cond7.preheader.preheader ]
+  %cmp81 = icmp sgt i64 %n, 0
+  br i1 %cmp81, label %for.body9.preheader, label %for.inc12
+
+for.body9.preheader:                              ; preds = %for.cond7.preheader
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9.preheader, %for.body9
+  %l.02 = phi i64 [ %inc11, %for.body9 ], [ 0, %for.body9.preheader ]
+  %arrayidx12 = getelementptr inbounds [100 x [100 x [100 x i64]]], ptr %A, i64 %i.013, i64 %j.09, i64 %k.07, i64 %l.02
+  store i64 %i.013, ptr %arrayidx12, align 8
+  %inc11 = add nsw i64 %l.02, 1
+  %exitcond15 = icmp ne i64 %inc11, %n
+  br i1 %exitcond15, label %for.body9, label %for.inc12.loopexit
+
+for.inc12.loopexit:                               ; preds = %for.body9
+  br label %for.inc12
+
+for.inc12:                                        ; preds = %for.inc12.loopexit, %for.cond7.preheader
+  %inc13 = add nsw i64 %k.07, 1
+  %exitcond16 = icmp ne i64 %inc13, %n
+  br i1 %exitcond16, label %for.cond7.preheader, label %for.cond15.loopexit.loopexit
+
+for.cond15.loopexit.loopexit:                     ; preds = %for.inc12
+  br label %for.cond15.loopexit
+
+for.cond15.loopexit:                              ; preds = %for.cond15.loopexit.loopexit, %for.cond4.preheader
+  %cmp163 = icmp sgt i64 %n, 0
+  br i1 %cmp163, label %for.cond18.preheader.preheader, label %for.inc30
+
+for.cond18.preheader.preheader:                   ; preds = %for.cond15.loopexit
+  br label %for.cond18.preheader
+
+for.cond18.preheader:                             ; preds = %for.cond18.preheader.preheader, %for.inc27
+  %k14.06 = phi i64 [ %inc28, %for.inc27 ], [ 1, %for.cond18.preheader.preheader ]
+  %B.addr.25 = phi ptr [ %B.addr.3.lcssa, %for.inc27 ], [ %B.addr.110, %for.cond18.preheader.preheader ]
+  %cmp191 = icmp sgt i64 %n, 0
+  br i1 %cmp191, label %for.body20.preheader, label %for.inc27
+
+for.body20.preheader:                             ; preds = %for.cond18.preheader
+  br label %for.body20
+
+for.body20:                                       ; preds = %for.body20.preheader, %for.body20
+  %l17.04 = phi i64 [ %inc25, %for.body20 ], [ 0, %for.body20.preheader ]
+  %B.addr.34 = phi ptr [ %incdec.ptr, %for.body20 ], [ %B.addr.25, %for.body20.preheader ]
+  %add = add nsw i64 %l17.04, 1
+  %add21 = add nsw i64 %k14.06, 2
+  %add22 = add nsw i64 %j.09, 3
+  %add23 = add nsw i64 %i.013, 4
+  %arrayidx24 = getelementptr inbounds [100 x [100 x [100 x i64]]], ptr %A, i64 %add23, i64 %add22, i64 %add21, i64 %add
+  %0 = load i64, ptr %arrayidx24, align 8
+  %incdec.ptr = getelementptr inbounds i64, ptr %B.addr.34, i64 1
+  store i64 %0, ptr %B.addr.34, align 8
+  %inc25 = add nsw i64 %l17.04, 1
+  %exitcond = icmp ne i64 %inc25, %n
+  br i1 %exitcond, label %for.body20, label %for.inc27.loopexit
+
+for.inc27.loopexit:                               ; preds = %for.body20
+  %scevgep = getelementptr i64, ptr %B.addr.25, i64 %n
+  br label %for.inc27
+
+for.inc27:                                        ; preds = %for.inc27.loopexit, %for.cond18.preheader
+  %B.addr.3.lcssa = phi ptr [ %B.addr.25, %for.cond18.preheader ], [ %scevgep, %for.inc27.loopexit ]
+  %inc28 = add nsw i64 %k14.06, 1
+  %inc29 = add nsw i64 %n, 1
+  %exitcond17 = icmp ne i64 %inc28, %inc29
+  br i1 %exitcond17, label %for.cond18.preheader, label %for.inc30.loopexit
+
+for.inc30.loopexit:                               ; preds = %for.inc27
+  %B.addr.3.lcssa.lcssa = phi ptr [ %B.addr.3.lcssa, %for.inc27 ]
+  br label %for.inc30
+
+for.inc30:                                        ; preds = %for.inc30.loopexit, %for.cond15.loopexit
+  %B.addr.2.lcssa = phi ptr [ %B.addr.110, %for.cond15.loopexit ], [ %B.addr.3.lcssa.lcssa, %for.inc30.loopexit ]
+  %inc31 = add nsw i64 %j.09, 1
+  %exitcond18 = icmp ne i64 %inc31, %n
+  br i1 %exitcond18, label %for.cond4.preheader, label %for.inc33.loopexit
+
+for.inc33.loopexit:                               ; preds = %for.inc30
+  %B.addr.2.lcssa.lcssa = phi ptr [ %B.addr.2.lcssa, %for.inc30 ]
+  br label %for.inc33
+
+for.inc33:                                        ; preds = %for.inc33.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi ptr [ %B.addr.014, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc33.loopexit ]
+  %inc34 = add nsw i64 %i.013, 1
+  %exitcond19 = icmp ne i64 %inc34, %n
+  br i1 %exitcond19, label %for.cond1.preheader, label %for.end35.loopexit
+
+for.end35.loopexit:                               ; preds = %for.inc33
+  br label %for.end35
+
+for.end35:                                        ; preds = %for.end35.loopexit, %entry
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/LoopFusion/backward_loop_carried.ll b/llvm/test/Transforms/LoopFusion/backward_loop_carried.ll
new file mode 100644
index 0000000000000..30c683d147d4e
--- /dev/null
+++ b/llvm/test/Transforms/LoopFusion/backward_loop_carried.ll
@@ -0,0 +1,185 @@
+; RUN: opt -S -passes=loop-fusion -da-disable-delinearization-checks < %s | FileCheck %s
+
+; The two inner loops have no dependency and are allowed to be fused as in the
+; outer loops, different levels are accessed to.
+
+; C Code
+;
+;;  for (long int i = 0; i < n; i++) {
+;;    for (long int j = 0; j < n; j++) {
+;;      for (long int k = 0; k < n; k++) {
+;;        A[i][j][k] = i;
+;;      }
+;;      for (long int k = 0; k < n; k++) {
+;;        temp = A[i + 3][j + 2][k + 1];
+
+define void @backward_dep0(i64 %n, ptr %A) nounwind uwtable ssp {
+entry:
+  %cmp10 = icmp sgt i64 %n, 0
+  br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
+
+; CHECK-LABEL: backward_dep
+; CHECK-COUNT-1: for.body{{[0-9]+}}:
+; CHECK-NOT:     for.body{{[0-9]+}}:
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc24
+  %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
+  %cmp26 = icmp sgt i64 %n, 0
+  br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
+
+for.cond4.preheader.preheader:                    ; preds = %for.cond1.preheader
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.inc21
+  %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
+  %cmp51 = icmp sgt i64 %n, 0
+  br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
+
+for.body6.preheader:                              ; preds = %for.cond4.preheader
+  br label %for.body6
+
+for.body6:                                        ; preds = %for.body6.preheader, %for.body6
+  %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
+  %arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
+  store i64 %i.011, ptr %arrayidx8, align 8
+  %inc = add nsw i64 %k.02, 1
+  %exitcond13 = icmp ne i64 %inc, %n
+  br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
+
+for.cond10.loopexit.loopexit:                     ; preds = %for.body6
+  br label %for.cond10.loopexit
+
+for.cond10.loopexit:                              ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
+  %cmp113 = icmp sgt i64 %n, 0
+  br i1 %cmp113, label %for.body12.preheader, label %for.inc21
+
+for.body12.preheader:                             ; preds = %for.cond10.loopexit
+  br label %for.body12
+
+for.body12:                                       ; preds = %for.body12.preheader, %for.body12
+  %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
+  %add = add nsw i64 %k9.05, 1
+  %add13 = add nsw i64 %j.07, 2
+  %add14 = add nsw i64 %i.011, 3
+  %arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %add14, i64 %add13, i64 %add
+  %0 = load i64, ptr %arrayidx17, align 8
+  %inc19 = add nsw i64 %k9.05, 1
+  %exitcond = icmp ne i64 %inc19, %n
+  br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
+
+for.inc21.loopexit:                               ; preds = %for.body12
+  br label %for.inc21
+
+for.inc21:                                        ; preds = %for.inc21.loopexit, %for.cond10.loopexit
+  %inc22 = add nsw i64 %j.07, 1
+  %exitcond14 = icmp ne i64 %inc22, %n
+  br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
+
+for.inc24.loopexit:                               ; preds = %for.inc21
+  br label %for.inc24
+
+for.inc24:                                        ; preds = %for.inc24.loopexit, %for.cond1.preheader
+  %inc25 = add nsw i64 %i.011, 1
+  %exitcond15 = icmp ne i64 %inc25, %n
+  br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
+
+for.end26.loopexit:                               ; preds = %for.inc24
+  br label %for.end26
+
+for.end26:                                        ; preds = %for.end26.loopexit, %entry
+  ret void
+}
+
+; The two inner loops have a backward loop-carried dependency, allowing them
+; to be fused.
+
+; C Code
+;
+;;  for (long int i = 0; i < n; i++) {
+;;    for (long int j = 0; j < n; j++) {
+;;      for (long int k = 0; k < n; k++) {
+;;        A[i][j][k] = i;
+;;      }
+;;      for (long int k = 0; k < n; k++) {
+;;        temp = A[i][j][k - 1];
+
+define void @backward_dep1(i64 %n, ptr %A) nounwind uwtable ssp {
+entry:
+  %cmp10 = icmp sgt i64 %n, 0
+  br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
+
+; CHECK-LABEL: backward_dep
+; CHECK-COUNT-1: for.body{{[0-9]+}}:
+; CHECK-NOT:     for.body{{[0-9]+}}:
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc24
+  %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
+  %cmp26 = icmp sgt i64 %n, 0
+  br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
+
+for.cond4.preheader.preheader:                    ; preds = %for.cond1.preheader
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.inc21
+  %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
+  %cmp51 = icmp sgt i64 %n, 0
+  br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
+
+for.body6.preheader:                              ; preds = %for.cond4.preheader
+  br label %for.body6
+
+for.body6:                                        ; preds = %for.body6.preheader, %for.body6
+  %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
+  %arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
+  store i64 %i.011, ptr %arrayidx8, align 8
+  %inc = add nsw i64 %k.02, 1
+  %exitcond13 = icmp ne i64 %inc, %n
+  br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
+
+for.cond10.loopexit.loopexit:                     ; preds = %for.body6
+  br label %for.cond10.loopexit
+
+for.cond10.loopexit:                              ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
+  %cmp113 = icmp sgt i64 %n, 0
+  br i1 %cmp113, label %for.body12.preheader, label %for.inc21
+
+for.body12.preheader:                             ; preds = %for.cond10.loopexit
+  br label %for.body12
+
+for.body12:                                       ; preds = %for.body12.preheader, %for.body12
+  %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
+  %add = add nsw i64 %k9.05, -1
+  %arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %add
+  %0 = load i64, ptr %arrayidx17, align 8
+  %inc19 = add nsw i64 %k9.05, 1
+  %exitcond = icmp ne i64 %inc19, %n
+  br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
+
+for.inc21.loopexit:                               ; preds = %for.body12
+  br label %for.inc21
+
+for.inc21:                                        ; preds = %for.inc21.loopexit, %for.cond10.loopexit
+  %inc22 = add nsw i64 %j.07, 1
+  %exitcond14 = icmp ne i64 %inc22, %n
+  br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
+
+for.inc24.loopexit:                               ; preds = %for.inc21
+  br label %for.inc24
+
+for.inc24:                                        ; preds = %for.inc24.loopexit, %for.cond1.preheader
+  %inc25 = add nsw i64 %i.011, 1
+  %exitcond15 = icmp ne i64 %inc25, %n
+  br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
+
+for.end26.loopexit:                               ; preds = %for.inc24
+  br label %for.end26
+
+for.end26:                                        ; preds = %for.end26.loopexit, %entry
+  ret void
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/LoopFusion/simple.ll b/llvm/test/Transforms/LoopFusion/simple.ll
index d63890df14461..dfb3d13b56f04 100644
--- a/llvm/test/Transforms/LoopFusion/simple.ll
+++ b/llvm/test/Transforms/LoopFusion/simple.ll
@@ -300,40 +300,36 @@ bb23:                                             ; preds = %bb17, %bb
 
 define void @forward_dep(ptr noalias %arg) {
 ; CHECK-LABEL: @forward_dep(
-; CHECK-NEXT:  bb:
-; CHECK-NEXT:    br label [[BB7:%.*]]
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    br label %[[BB7:.*]]
 ; CHECK:       bb7:
-; CHECK-NEXT:    [[DOT013:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB14:%.*]] ]
-; CHECK-NEXT:    [[INDVARS_IV22:%.*]] = phi i64 [ 0, [[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], [[BB14]] ]
+; CHECK-NEXT:    [[DOT013:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP15:%.*]], %[[BB25:.*]] ]
+; CHECK-NEXT:    [[INDVARS_IV22:%.*]] = phi i64 [ 0, %[[BB]] ], [ [[INDVARS_IV_NEXT3:%.*]], %[[BB25]] ]
+; CHECK-NEXT:    [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[BB25]] ], [ 0, %[[BB]] ]
 ; CHECK-NEXT:    [[TMP:%.*]] = add nsw i32 [[DOT013]], -3
 ; CHECK-NEXT:    [[TMP8:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 3
 ; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[TMP8]] to i32
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul nsw i32 [[TMP]], [[TMP9]]
 ; CHECK-NEXT:    [[TMP11:%.*]] = trunc i64 [[INDVARS_IV22]] to i32
 ; CHECK-NEXT:    [[TMP12:%.*]] = srem i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG:%.*]], i64 [[INDVARS_IV22]]
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV22]]
 ; CHECK-NEXT:    store i32 [[TMP12]], ptr [[TMP13]], align 4
-; CHECK-NEXT:    br label [[BB14]]
+; CHECK-NEXT:    br label %[[BB14:.*]]
 ; CHECK:       bb14:
-; CHECK-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
-; CHECK-NEXT:    [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
-; CHECK-NEXT:    [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND4]], label [[BB7]], label [[BB19_PREHEADER:%.*]]
-; CHECK:       bb19.preheader:
-; CHECK-NEXT:    br label [[BB19:%.*]]
-; CHECK:       bb19:
-; CHECK-NEXT:    [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BB25:%.*]] ], [ 0, [[BB19_PREHEADER]] ]
 ; CHECK-NEXT:    [[TMP20:%.*]] = add nsw i64 [[INDVARS_IV1]], -3
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[TMP20]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = load i32, ptr [[TMP21]], align 4
 ; CHECK-NEXT:    [[TMP23:%.*]] = mul nsw i32 [[TMP22]], 3
 ; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[ARG]], i64 [[INDVARS_IV1]]
 ; CHECK-NEXT:    store i32 [[TMP23]], ptr [[TMP24]], align 4
-; CHECK-NEXT:    br label [[BB25]]
+; CHECK-NEXT:    br label %[[BB25]]
 ; CHECK:       bb25:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV22]], 1
+; CHECK-NEXT:    [[TMP15]] = add nuw nsw i32 [[DOT013]], 1
+; CHECK-NEXT:    [[EXITCOND4:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT3]], 100
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV1]], 1
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 100
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[BB19]], label [[BB26:%.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[BB7]], label %[[BB26:.*]]
 ; CHECK:       bb26:
 ; CHECK-NEXT:    ret void
 ;



More information about the llvm-commits mailing list