[llvm] [DependenceAnalysis] Extending SIV to handle separate loops (PR #128782)

Alireza Torabian via llvm-commits llvm-commits at lists.llvm.org
Thu May 29 10:42:11 PDT 2025


https://github.com/1997alireza updated https://github.com/llvm/llvm-project/pull/128782

>From 0b6baecfb0a4824b387904127d1046992c331bf6 Mon Sep 17 00:00:00 2001
From: Alireza Torabian <alireza.torabian at huawei.com>
Date: Mon, 24 Feb 2025 11:53:53 -0500
Subject: [PATCH] [DependenceAnalysis] Extending SIV to handle separate loops

When there is a dependency between two memory instructions in separate
loops, SIV will be able to test them and compute the direction and
the distance of the dependency.
---
 .../llvm/Analysis/DependenceAnalysis.h        | 202 +++++---
 llvm/lib/Analysis/DependenceAnalysis.cpp      | 477 +++++++++++-------
 .../DependenceAnalysis/SIVSeparateLoops.ll    | 145 ++++++
 3 files changed, 576 insertions(+), 248 deletions(-)
 create mode 100644 llvm/test/Analysis/DependenceAnalysis/SIVSeparateLoops.ll

diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 426ac757b4b0d..85e5fe1ed1f18 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -81,6 +81,17 @@ namespace llvm {
     /// Dependence::DVEntry - Each level in the distance/direction vector
     /// has a direction (or perhaps a union of several directions), and
     /// perhaps a distance.
+    /// The dependency information could be across a single loop level or across 
+    /// two separate levels that are similar. Two levels are considered similar
+    /// if they can be interpreted as a single fused loop, i.e., have the same
+    /// trip count and the same nesting depth.
+    /// For example, loops b and c are similar and considered as separate loops:
+    ///    for (a = ...) {
+    ///      for (b = 0; b < 10; b++) {
+    ///      }
+    ///      for (c = 0; c < 10; c++) {
+    ///      }
+    ///    }
     struct DVEntry {
       enum : unsigned char {
         NONE = 0,
@@ -152,13 +163,26 @@ namespace llvm {
     /// source and destination of the dependence.
     virtual unsigned getLevels() const { return 0; }
 
+    /// getSeparateLevels - Returns the number of separate loops surrounding
+    /// the source and destination of the dependence.
+    virtual unsigned getSeparateLevels() const { return 0; }
+
+    /// getDVEntry - Returns the DV entry associated with a regular or a
+    /// separate level
+    DVEntry getDVEntry(unsigned Level, bool Separate) const;
+
     /// getDirection - Returns the direction associated with a particular
-    /// level.
-    virtual unsigned getDirection(unsigned Level) const { return DVEntry::ALL; }
+    /// common or separate level.
+    virtual unsigned getDirection(unsigned Level, bool Separate = false) const {
+      return DVEntry::ALL;
+    }
 
     /// getDistance - Returns the distance (or NULL) associated with a
-    /// particular level.
-    virtual const SCEV *getDistance(unsigned Level) const { return nullptr; }
+    /// particular common or separate level.
+    virtual const SCEV *getDistance(unsigned Level,
+                                    bool Separate = false) const {
+      return nullptr;
+    }
 
     /// Check if the direction vector is negative. A negative direction
     /// vector means Src and Dst are reversed in the actual program.
@@ -171,21 +195,31 @@ namespace llvm {
     virtual bool normalize(ScalarEvolution *SE) { return false; }
 
     /// isPeelFirst - Returns true if peeling the first iteration from
-    /// this loop will break this dependence.
-    virtual bool isPeelFirst(unsigned Level) const { return false; }
+    /// this regular or separate loop level will break this dependence.
+    virtual bool isPeelFirst(unsigned Level, bool Separate = false) const {
+      return false;
+    }
 
     /// isPeelLast - Returns true if peeling the last iteration from
-    /// this loop will break this dependence.
-    virtual bool isPeelLast(unsigned Level) const { return false; }
+    /// this regular or separate loop level will break this dependence.
+    virtual bool isPeelLast(unsigned Level, bool Separate = false) const {
+      return false;
+    }
 
-    /// isSplitable - Returns true if splitting this loop will break
+    /// isSplitable - Returns true if splitting the loop will break
     /// the dependence.
-    virtual bool isSplitable(unsigned Level) const { return false; }
+    virtual bool isSplitable(unsigned Level, bool Separate = false) const {
+      return false;
+    }
+
+    /// inSeparateLoops - Returns true if this level is a separate level, i.e.,
+    /// performed across two separate loop nests.
+    virtual bool inSeparateLoops(unsigned Level) const { return false; }
 
-    /// isScalar - Returns true if a particular level is scalar; that is,
-    /// if no subscript in the source or destination mention the induction
-    /// variable associated with the loop at this level.
-    virtual bool isScalar(unsigned Level) const;
+    /// isScalar - Returns true if a particular regular or separate level is
+    /// scalar; that is, if no subscript in the source or destination mention
+    /// the induction variable associated with the loop at this level.
+    virtual bool isScalar(unsigned Level, bool Separate = false) const;
 
     /// getNextPredecessor - Returns the value of the NextPredecessor
     /// field.
@@ -207,6 +241,10 @@ namespace llvm {
     ///
     void dump(raw_ostream &OS) const;
 
+    /// dumpImp - For debugging purposes. Dumps a dependence to OS with or
+    /// without considering the separate levels.
+    void dumpImp(raw_ostream &OS, bool Separate = false) const;
+
   protected:
     Instruction *Src, *Dst;
 
@@ -245,13 +283,31 @@ namespace llvm {
     /// source and destination of the dependence.
     unsigned getLevels() const override { return Levels; }
 
+    /// getSeparateLevels - Returns the number of separate loops surrounding
+    /// the source and destination of the dependence.
+    unsigned getSeparateLevels() const override { return SeparateLevels; }
+
+    /// getDVEntry - Returns the DV entry associated with a regular or a
+    /// separate level
+    DVEntry getDVEntry(unsigned Level, bool Separate) const {
+      if (!Separate) {
+        assert(0 < Level && Level <= Levels && "Level out of range");
+        return DV[Level - 1];
+      } else {
+        assert(Levels < Level && Level <= Levels + SeparateLevels &&
+               "Separate level out of range");
+        return DVSeparate[Level - Levels - 1];
+      }
+    }
+
     /// getDirection - Returns the direction associated with a particular
-    /// level.
-    unsigned getDirection(unsigned Level) const override;
+    /// common or separate level.
+    unsigned getDirection(unsigned Level, bool Separate = false) const override;
 
     /// getDistance - Returns the distance (or NULL) associated with a
-    /// particular level.
-    const SCEV *getDistance(unsigned Level) const override;
+    /// particular common or separate level.
+    const SCEV *getDistance(unsigned Level,
+                            bool Separate = false) const override;
 
     /// Check if the direction vector is negative. A negative direction
     /// vector means Src and Dst are reversed in the actual program.
@@ -264,27 +320,33 @@ namespace llvm {
     bool normalize(ScalarEvolution *SE) override;
 
     /// isPeelFirst - Returns true if peeling the first iteration from
-    /// this loop will break this dependence.
-    bool isPeelFirst(unsigned Level) const override;
+    /// this regular or separate loop level will break this dependence.
+    bool isPeelFirst(unsigned Level, bool Separate = false) const override;
 
     /// isPeelLast - Returns true if peeling the last iteration from
-    /// this loop will break this dependence.
-    bool isPeelLast(unsigned Level) const override;
+    /// this regular or separate loop level will break this dependence.
+    bool isPeelLast(unsigned Level, bool Separate = false) const override;
 
     /// isSplitable - Returns true if splitting the loop will break
     /// the dependence.
-    bool isSplitable(unsigned Level) const override;
+    bool isSplitable(unsigned Level, bool Separate = false) const override;
 
-    /// isScalar - Returns true if a particular level is scalar; that is,
-    /// if no subscript in the source or destination mention the induction
-    /// variable associated with the loop at this level.
-    bool isScalar(unsigned Level) const override;
+    /// inSeparateLoops - Returns true if this level is a separate level, i.e.,
+    /// performed across two separate loop nests.
+    bool inSeparateLoops(unsigned Level) const override;
+
+    /// isScalar - Returns true if a particular regular or separate level is
+    /// scalar; that is, if no subscript in the source or destination mention
+    /// the induction variable associated with the loop at this level.
+    bool isScalar(unsigned Level, bool Separate = false) const override;
 
   private:
     unsigned short Levels;
+    unsigned short SeparateLevels;
     bool LoopIndependent;
     bool Consistent; // Init to true, then refine.
     std::unique_ptr<DVEntry[]> DV;
+    std::unique_ptr<DVEntry[]> DVSeparate;
     friend class DependenceInfo;
   };
 
@@ -405,7 +467,8 @@ namespace llvm {
       const SCEV *A;
       const SCEV *B;
       const SCEV *C;
-      const Loop *AssociatedLoop;
+      const Loop *AssociatedSrcLoop;
+      const Loop *AssociatedDstLoop;
 
     public:
       /// isEmpty - Return true if the constraint is of kind Empty.
@@ -449,18 +512,25 @@ namespace llvm {
       /// Otherwise assert.
       const SCEV *getD() const;
 
-      /// getAssociatedLoop - Returns the loop associated with this constraint.
-      const Loop *getAssociatedLoop() const;
+      /// getAssociatedSrcLoop - Returns the source loop associated with this
+      /// constraint.
+      const Loop *getAssociatedSrcLoop() const;
+
+      /// getAssociatedDstLoop - Returns the destination loop associated with
+      /// this constraint.
+      const Loop *getAssociatedDstLoop() const;
 
       /// setPoint - Change a constraint to Point.
-      void setPoint(const SCEV *X, const SCEV *Y, const Loop *CurrentLoop);
+      void setPoint(const SCEV *X, const SCEV *Y, const Loop *CurrentSrcLoop,
+                    const Loop *CurrentDstLoop);
 
       /// setLine - Change a constraint to Line.
-      void setLine(const SCEV *A, const SCEV *B,
-                   const SCEV *C, const Loop *CurrentLoop);
+      void setLine(const SCEV *A, const SCEV *B, const SCEV *C,
+                   const Loop *CurrentSrcLoop, const Loop *CurrentDstLoop);
 
       /// setDistance - Change a constraint to Distance.
-      void setDistance(const SCEV *D, const Loop *CurrentLoop);
+      void setDistance(const SCEV *D, const Loop *CurrentSrcLoop,
+                       const Loop *CurrentDstLoop);
 
       /// setEmpty - Change a constraint to Empty.
       void setEmpty();
@@ -473,6 +543,10 @@ namespace llvm {
       void dump(raw_ostream &OS) const;
     };
 
+    /// Returns true if two loops are the same or they have the same tripcount
+    /// and depth
+    bool areLoopsSimilar(const Loop *SrcLoop, const Loop *DstLoop) const;
+
     /// establishNestingLevels - Examines the loop nesting of the Src and Dst
     /// instructions and establishes their shared loops. Sets the variables
     /// CommonLevels, SrcLevels, and MaxLevels.
@@ -523,10 +597,15 @@ namespace llvm {
     ///     e - 5
     ///     f - 6
     ///     g - 7 = MaxLevels
-    void establishNestingLevels(const Instruction *Src,
-                                const Instruction *Dst);
+    /// SeparateLevels counts the number of loop levels after the common levels
+    /// that are not identical but are considered similar. Two levels are
+    /// considered similar if they have the same trip count and the same
+    /// nesting depth.
+    /// For example, if loops `c` and `e` are similar, then they contribute to
+    /// the SeparateLevels count and SeparateLevels is set to 1.
+    void establishNestingLevels(const Instruction *Src, const Instruction *Dst);
 
-    unsigned CommonLevels, SrcLevels, MaxLevels;
+    unsigned CommonLevels, SrcLevels, MaxLevels, SeparateLevels;
 
     /// mapSrcLoop - Given one of the loops containing the source, return
     /// its level index in our numbering scheme.
@@ -665,13 +744,10 @@ namespace llvm {
     /// Returns true if any possible dependence is disproved.
     /// If there might be a dependence, returns false.
     /// Sets appropriate direction and distance.
-    bool strongSIVtest(const SCEV *Coeff,
-                       const SCEV *SrcConst,
-                       const SCEV *DstConst,
-                       const Loop *CurrentLoop,
-                       unsigned Level,
-                       FullDependence &Result,
-                       Constraint &NewConstraint) const;
+    bool strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
+                       const SCEV *DstConst, const Loop *CurrentSrcLoop,
+                       const Loop *CurrentDstLoop, unsigned Level,
+                       FullDependence &Result, Constraint &NewConstraint) const;
 
     /// weakCrossingSIVtest - Tests the weak-crossing SIV subscript pair
     /// (Src and Dst) for dependence.
@@ -683,13 +759,10 @@ namespace llvm {
     /// Sets appropriate direction entry.
     /// Set consistent to false.
     /// Marks the dependence as splitable.
-    bool weakCrossingSIVtest(const SCEV *SrcCoeff,
-                             const SCEV *SrcConst,
-                             const SCEV *DstConst,
-                             const Loop *CurrentLoop,
-                             unsigned Level,
-                             FullDependence &Result,
-                             Constraint &NewConstraint,
+    bool weakCrossingSIVtest(const SCEV *SrcCoeff, const SCEV *SrcConst,
+                             const SCEV *DstConst, const Loop *CurrentSrcLoop,
+                             const Loop *CurrentDstLoop, unsigned Level,
+                             FullDependence &Result, Constraint &NewConstraint,
                              const SCEV *&SplitIter) const;
 
     /// ExactSIVtest - Tests the SIV subscript pair
@@ -701,13 +774,10 @@ namespace llvm {
     /// If there might be a dependence, returns false.
     /// Sets appropriate direction entry.
     /// Set consistent to false.
-    bool exactSIVtest(const SCEV *SrcCoeff,
-                      const SCEV *DstCoeff,
-                      const SCEV *SrcConst,
-                      const SCEV *DstConst,
-                      const Loop *CurrentLoop,
-                      unsigned Level,
-                      FullDependence &Result,
+    bool exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
+                      const SCEV *SrcConst, const SCEV *DstConst,
+                      const Loop *CurrentSrcLoop, const Loop *CurrentDstLoop,
+                      unsigned Level, FullDependence &Result,
                       Constraint &NewConstraint) const;
 
     /// weakZeroSrcSIVtest - Tests the weak-zero SIV subscript pair
@@ -720,11 +790,9 @@ namespace llvm {
     /// Sets appropriate direction entry.
     /// Set consistent to false.
     /// If loop peeling will break the dependence, mark appropriately.
-    bool weakZeroSrcSIVtest(const SCEV *DstCoeff,
-                            const SCEV *SrcConst,
-                            const SCEV *DstConst,
-                            const Loop *CurrentLoop,
-                            unsigned Level,
+    bool weakZeroSrcSIVtest(const SCEV *DstCoeff, const SCEV *SrcConst,
+                            const SCEV *DstConst, const Loop *CurrentSrcLoop,
+                            const Loop *CurrentDstLoop, unsigned Level,
                             FullDependence &Result,
                             Constraint &NewConstraint) const;
 
@@ -738,11 +806,9 @@ namespace llvm {
     /// Sets appropriate direction entry.
     /// Set consistent to false.
     /// If loop peeling will break the dependence, mark appropriately.
-    bool weakZeroDstSIVtest(const SCEV *SrcCoeff,
-                            const SCEV *SrcConst,
-                            const SCEV *DstConst,
-                            const Loop *CurrentLoop,
-                            unsigned Level,
+    bool weakZeroDstSIVtest(const SCEV *SrcCoeff, const SCEV *SrcConst,
+                            const SCEV *DstConst, const Loop *CurrentSrcLoop,
+                            const Loop *CurrentDstLoop, unsigned Level,
                             FullDependence &Result,
                             Constraint &NewConstraint) const;
 
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 580cf43be3cb9..1d7b74ba2f790 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -104,6 +104,7 @@ STATISTIC(GCDindependence, "GCD independence");
 STATISTIC(BanerjeeApplications, "Banerjee applications");
 STATISTIC(BanerjeeIndependence, "Banerjee independence");
 STATISTIC(BanerjeeSuccesses, "Banerjee successes");
+STATISTIC(SeparateLoopsConsidered, "Separate loops considered");
 
 static cl::opt<bool>
     Delinearize("da-delinearize", cl::init(true), cl::Hidden,
@@ -253,10 +254,7 @@ bool Dependence::isAnti() const {
 // if no subscript in the source or destination mention the induction
 // variable associated with the loop at this level.
 // Leave this out of line, so it will serve as a virtual method anchor
-bool Dependence::isScalar(unsigned level) const {
-  return false;
-}
-
+bool Dependence::isScalar(unsigned level, bool Separate) const { return false; }
 
 //===----------------------------------------------------------------------===//
 // FullDependence methods
@@ -329,51 +327,48 @@ bool FullDependence::normalize(ScalarEvolution *SE) {
 
 // The rest are simple getters that hide the implementation.
 
-// getDirection - Returns the direction associated with a particular level.
-unsigned FullDependence::getDirection(unsigned Level) const {
-  assert(0 < Level && Level <= Levels && "Level out of range");
-  return DV[Level - 1].Direction;
+// getDirection - Returns the direction associated with a particular common or
+// separate level.
+unsigned FullDependence::getDirection(unsigned Level, bool Separate) const {
+  return getDVEntry(Level, Separate).Direction;
 }
 
-
-// Returns the distance (or NULL) associated with a particular level.
-const SCEV *FullDependence::getDistance(unsigned Level) const {
-  assert(0 < Level && Level <= Levels && "Level out of range");
-  return DV[Level - 1].Distance;
+// Returns the distance (or NULL) associated with a particular common or
+// separate level.
+const SCEV *FullDependence::getDistance(unsigned Level, bool Separate) const {
+  return getDVEntry(Level, Separate).Distance;
 }
 
-
-// Returns true if a particular level is scalar; that is,
-// if no subscript in the source or destination mention the induction
-// variable associated with the loop at this level.
-bool FullDependence::isScalar(unsigned Level) const {
-  assert(0 < Level && Level <= Levels && "Level out of range");
-  return DV[Level - 1].Scalar;
+// Returns true if a particular regular or separate level is scalar; that is,
+// if no subscript in the source or destination mention the induction variable
+// associated with the loop at this level.
+bool FullDependence::isScalar(unsigned Level, bool Separate) const {
+  return getDVEntry(Level, Separate).Scalar;
 }
 
-
-// Returns true if peeling the first iteration from this loop
-// will break this dependence.
-bool FullDependence::isPeelFirst(unsigned Level) const {
-  assert(0 < Level && Level <= Levels && "Level out of range");
-  return DV[Level - 1].PeelFirst;
+// Returns true if peeling the first iteration from this regular or separate
+// loop level will break this dependence.
+bool FullDependence::isPeelFirst(unsigned Level, bool Separate) const {
+  return getDVEntry(Level, Separate).PeelFirst;
 }
 
-
-// Returns true if peeling the last iteration from this loop
-// will break this dependence.
-bool FullDependence::isPeelLast(unsigned Level) const {
-  assert(0 < Level && Level <= Levels && "Level out of range");
-  return DV[Level - 1].PeelLast;
+// Returns true if peeling the last iteration from this regular or separate
+// loop level will break this dependence.
+bool FullDependence::isPeelLast(unsigned Level, bool Separate) const {
+  return getDVEntry(Level, Separate).PeelLast;
 }
 
-
-// Returns true if splitting this loop will break the dependence.
-bool FullDependence::isSplitable(unsigned Level) const {
-  assert(0 < Level && Level <= Levels && "Level out of range");
-  return DV[Level - 1].Splitable;
+// Returns true if splitting loop will break the dependence.
+bool FullDependence::isSplitable(unsigned Level, bool Separate) const {
+  return getDVEntry(Level, Separate).Splitable;
 }
 
+// inSeparateLoops - Returns true if this level is a separate level, i.e.,
+// performed across two separate loop nests.
+bool FullDependence::inSeparateLoops(unsigned Level) const {
+  assert(0 < Level && Level <= Levels + SeparateLevels && "Level out of range");
+  return Level > Levels;
+}
 
 //===----------------------------------------------------------------------===//
 // DependenceInfo::Constraint methods
@@ -428,38 +423,50 @@ const SCEV *DependenceInfo::Constraint::getD() const {
   return SE->getNegativeSCEV(C);
 }
 
+// Returns the source loop associated with this constraint.
+const Loop *DependenceInfo::Constraint::getAssociatedSrcLoop() const {
+  assert((Kind == Distance || Kind == Line || Kind == Point) &&
+         "Kind should be Distance, Line, or Point");
+  return AssociatedSrcLoop;
+}
 
-// Returns the loop associated with this constraint.
-const Loop *DependenceInfo::Constraint::getAssociatedLoop() const {
+// Returns the destination loop associated with this constraint.
+const Loop *DependenceInfo::Constraint::getAssociatedDstLoop() const {
   assert((Kind == Distance || Kind == Line || Kind == Point) &&
          "Kind should be Distance, Line, or Point");
-  return AssociatedLoop;
+  return AssociatedDstLoop;
 }
 
 void DependenceInfo::Constraint::setPoint(const SCEV *X, const SCEV *Y,
-                                          const Loop *CurLoop) {
+                                          const Loop *CurSrcLoop,
+                                          const Loop *CurDstLoop) {
   Kind = Point;
   A = X;
   B = Y;
-  AssociatedLoop = CurLoop;
+  AssociatedSrcLoop = CurSrcLoop;
+  AssociatedDstLoop = CurDstLoop;
 }
 
 void DependenceInfo::Constraint::setLine(const SCEV *AA, const SCEV *BB,
-                                         const SCEV *CC, const Loop *CurLoop) {
+                                         const SCEV *CC, const Loop *CurSrcLoop,
+                                         const Loop *CurDstLoop) {
   Kind = Line;
   A = AA;
   B = BB;
   C = CC;
-  AssociatedLoop = CurLoop;
+  AssociatedSrcLoop = CurSrcLoop;
+  AssociatedDstLoop = CurDstLoop;
 }
 
 void DependenceInfo::Constraint::setDistance(const SCEV *D,
-                                             const Loop *CurLoop) {
+                                             const Loop *CurSrcLoop,
+                                             const Loop *CurDstLoop) {
   Kind = Distance;
   A = SE->getOne(D->getType());
   B = SE->getNegativeSCEV(A);
   C = SE->getNegativeSCEV(D);
-  AssociatedLoop = CurLoop;
+  AssociatedSrcLoop = CurSrcLoop;
+  AssociatedDstLoop = CurDstLoop;
 }
 
 void DependenceInfo::Constraint::setEmpty() { Kind = Empty; }
@@ -606,8 +613,8 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
         ++DeltaSuccesses;
         return true;
       }
-      if (const SCEVConstant *CUB =
-          collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
+      if (const SCEVConstant *CUB = collectConstantUpperBound(
+              X->getAssociatedSrcLoop(), Prod1->getType())) {
         const APInt &UpperBound = CUB->getAPInt();
         LLVM_DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
         if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
@@ -616,9 +623,8 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
           return true;
         }
       }
-      X->setPoint(SE->getConstant(Xq),
-                  SE->getConstant(Yq),
-                  X->getAssociatedLoop());
+      X->setPoint(SE->getConstant(Xq), SE->getConstant(Yq),
+                  X->getAssociatedSrcLoop(), X->getAssociatedDstLoop());
       ++DeltaSuccesses;
       return true;
     }
@@ -653,7 +659,6 @@ bool DependenceInfo::intersectConstraints(Constraint *X, const Constraint *Y) {
 
 // For debugging purposes. Dumps a dependence to OS.
 void Dependence::dump(raw_ostream &OS) const {
-  bool Splitable = false;
   if (isConfused())
     OS << "confused";
   else {
@@ -667,43 +672,61 @@ void Dependence::dump(raw_ostream &OS) const {
       OS << "anti";
     else if (isInput())
       OS << "input";
-    unsigned Levels = getLevels();
-    OS << " [";
-    for (unsigned II = 1; II <= Levels; ++II) {
-      if (isSplitable(II))
-        Splitable = true;
-      if (isPeelFirst(II))
-        OS << 'p';
-      const SCEV *Distance = getDistance(II);
-      if (Distance)
-        OS << *Distance;
-      else if (isScalar(II))
-        OS << "S";
+    dumpImp(OS);
+    unsigned SeparateLevels = getSeparateLevels();
+    if (SeparateLevels > 0) {
+      OS << "! / assuming " << SeparateLevels << " fused loop(s): ";
+      dumpImp(OS, true);
+    }
+  }
+  OS << "!\n";
+}
+
+// For debugging purposes. Dumps a dependence to OS with or without considering
+// the separate levels.
+void Dependence::dumpImp(raw_ostream &OS, bool Separate) const {
+  bool Splitable = false;
+  unsigned Levels = getLevels();
+  unsigned SeparateLevels = getSeparateLevels();
+  bool OnSeparates = false;
+  unsigned LevelNum = Levels;
+  if (Separate) LevelNum += SeparateLevels;
+  OS << " [";
+  for (unsigned II = 1; II <= LevelNum; ++II) {
+    if (!OnSeparates && inSeparateLoops(II))
+      OnSeparates = true;
+    if (isSplitable(II, OnSeparates))
+      Splitable = true;
+    if (isPeelFirst(II, OnSeparates))
+      OS << 'p';
+    const SCEV *Distance = getDistance(II, OnSeparates);
+    if (Distance)
+      OS << *Distance;
+    else if (isScalar(II, OnSeparates))
+      OS << "S";
+    else {
+      unsigned Direction = getDirection(II, OnSeparates);
+      if (Direction == DVEntry::ALL)
+        OS << "*";
       else {
-        unsigned Direction = getDirection(II);
-        if (Direction == DVEntry::ALL)
-          OS << "*";
-        else {
-          if (Direction & DVEntry::LT)
-            OS << "<";
-          if (Direction & DVEntry::EQ)
-            OS << "=";
-          if (Direction & DVEntry::GT)
-            OS << ">";
-        }
+        if (Direction & DVEntry::LT)
+          OS << "<";
+        if (Direction & DVEntry::EQ)
+          OS << "=";
+        if (Direction & DVEntry::GT)
+          OS << ">";
       }
-      if (isPeelLast(II))
-        OS << 'p';
-      if (II < Levels)
-        OS << " ";
     }
-    if (isLoopIndependent())
-      OS << "|<";
-    OS << "]";
-    if (Splitable)
-      OS << " splitable";
+    if (isPeelLast(II, OnSeparates))
+      OS << 'p';
+    if (II < LevelNum)
+      OS << " ";
   }
-  OS << "!\n";
+  if (isLoopIndependent())
+    OS << "|<";
+  OS << "]";
+  if (Splitable)
+    OS << " splitable";
 }
 
 // Returns NoAlias/MayAliass/MustAlias for two memory locations based upon their
@@ -756,6 +779,34 @@ bool isLoadOrStore(const Instruction *I) {
   return false;
 }
 
+// Returns true if two loops are the same or they have the same tripcount and
+// depth
+bool DependenceInfo::areLoopsSimilar(const Loop *SrcLoop,
+                                     const Loop *DstLoop) const {
+  if (SrcLoop == DstLoop)
+    return true;
+
+  if (SrcLoop->getLoopDepth() != DstLoop->getLoopDepth())
+    return false;
+
+  if (!SrcLoop || !SrcLoop->getLoopLatch() || !DstLoop ||
+      !DstLoop->getLoopLatch())
+    return false;
+
+  const SCEV *SrcUB, *DstUP;
+  if (SE->hasLoopInvariantBackedgeTakenCount(SrcLoop))
+    SrcUB = SE->getBackedgeTakenCount(SrcLoop);
+  if (SE->hasLoopInvariantBackedgeTakenCount(DstLoop))
+    DstUP = SE->getBackedgeTakenCount(DstLoop);
+
+  if (SrcUB == nullptr || DstUP == nullptr)
+    return false;
+
+  if (SE->isKnownPredicate(ICmpInst::ICMP_EQ, SrcUB, DstUP))
+    return true;
+
+  return false;
+}
 
 // Examines the loop nesting of the Src and Dst
 // instructions and establishes their shared loops. Sets the variables
@@ -807,6 +858,11 @@ bool isLoadOrStore(const Instruction *I) {
 //     e - 5
 //     f - 6
 //     g - 7 = MaxLevels
+// SeparateLevels counts the number of levels after common levels that are
+// not common but are similar, meaning that they have the same tripcount
+// and depth. Assume that in this code fragment, levels c and e are
+// similar. In this case only the loop nests at the next level after
+// common levels are similar, and SeparateLevel is set to 1.
 void DependenceInfo::establishNestingLevels(const Instruction *Src,
                                             const Instruction *Dst) {
   const BasicBlock *SrcBlock = Src->getParent();
@@ -817,6 +873,7 @@ void DependenceInfo::establishNestingLevels(const Instruction *Src,
   const Loop *DstLoop = LI->getLoopFor(DstBlock);
   SrcLevels = SrcLevel;
   MaxLevels = SrcLevel + DstLevel;
+  SeparateLevels = 0;
   while (SrcLevel > DstLevel) {
     SrcLoop = SrcLoop->getParentLoop();
     SrcLevel--;
@@ -825,16 +882,20 @@ void DependenceInfo::establishNestingLevels(const Instruction *Src,
     DstLoop = DstLoop->getParentLoop();
     DstLevel--;
   }
+
+  // find the first common level and count the separate levels leading to it
   while (SrcLoop != DstLoop) {
+    SeparateLevels++;
+    if (!areLoopsSimilar(SrcLoop, DstLoop))
+      SeparateLevels = 0;
+    SrcLevel--;
     SrcLoop = SrcLoop->getParentLoop();
     DstLoop = DstLoop->getParentLoop();
-    SrcLevel--;
   }
   CommonLevels = SrcLevel;
   MaxLevels -= CommonLevels;
 }
 
-
 // Given one of the loops containing the source, return
 // its level index in our numbering scheme.
 unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const {
@@ -1221,8 +1282,9 @@ bool DependenceInfo::testZIV(const SCEV *Src, const SCEV *Dst,
 //
 // Return true if dependence disproved.
 bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
-                                   const SCEV *DstConst, const Loop *CurLoop,
-                                   unsigned Level, FullDependence &Result,
+                                   const SCEV *DstConst, const Loop *CurSrcLoop,
+                                   const Loop *CurDstLoop, unsigned Level,
+                                   FullDependence &Result,
                                    Constraint &NewConstraint) const {
   LLVM_DEBUG(dbgs() << "\tStrong SIV test\n");
   LLVM_DEBUG(dbgs() << "\t    Coeff = " << *Coeff);
@@ -1240,7 +1302,8 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
   LLVM_DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
 
   // check that |Delta| < iteration count
-  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+  if (const SCEV *UpperBound =
+          collectUpperBound(CurSrcLoop, Delta->getType())) {
     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound);
     LLVM_DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
     const SCEV *AbsDelta =
@@ -1273,7 +1336,8 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
       return true;
     }
     Result.DV[Level].Distance = SE->getConstant(Distance);
-    NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
+    NewConstraint.setDistance(SE->getConstant(Distance), CurSrcLoop,
+                              CurDstLoop);
     if (Distance.sgt(0))
       Result.DV[Level].Direction &= Dependence::DVEntry::LT;
     else if (Distance.slt(0))
@@ -1285,7 +1349,7 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
   else if (Delta->isZero()) {
     // since 0/X == 0
     Result.DV[Level].Distance = Delta;
-    NewConstraint.setDistance(Delta, CurLoop);
+    NewConstraint.setDistance(Delta, CurSrcLoop, CurDstLoop);
     Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
     ++StrongSIVsuccesses;
   }
@@ -1293,13 +1357,12 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
     if (Coeff->isOne()) {
       LLVM_DEBUG(dbgs() << "\t    Distance = " << *Delta << "\n");
       Result.DV[Level].Distance = Delta; // since X/1 == X
-      NewConstraint.setDistance(Delta, CurLoop);
+      NewConstraint.setDistance(Delta, CurSrcLoop, CurDstLoop);
     }
     else {
       Result.Consistent = false;
-      NewConstraint.setLine(Coeff,
-                            SE->getNegativeSCEV(Coeff),
-                            SE->getNegativeSCEV(Delta), CurLoop);
+      NewConstraint.setLine(Coeff, SE->getNegativeSCEV(Coeff),
+                            SE->getNegativeSCEV(Delta), CurSrcLoop, CurDstLoop);
     }
 
     // maybe we can get a useful direction
@@ -1327,7 +1390,6 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
   return false;
 }
 
-
 // weakCrossingSIVtest -
 // From the paper, Practical Dependence Testing, Section 4.2.2
 //
@@ -1358,8 +1420,9 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
 // Return true if dependence disproved.
 bool DependenceInfo::weakCrossingSIVtest(
     const SCEV *Coeff, const SCEV *SrcConst, const SCEV *DstConst,
-    const Loop *CurLoop, unsigned Level, FullDependence &Result,
-    Constraint &NewConstraint, const SCEV *&SplitIter) const {
+    const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
+    FullDependence &Result, Constraint &NewConstraint,
+    const SCEV *&SplitIter) const {
   LLVM_DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
   LLVM_DEBUG(dbgs() << "\t    Coeff = " << *Coeff << "\n");
   LLVM_DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
@@ -1370,7 +1433,7 @@ bool DependenceInfo::weakCrossingSIVtest(
   Result.Consistent = false;
   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
-  NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
+  NewConstraint.setLine(Coeff, Coeff, Delta, CurSrcLoop, CurDstLoop);
   if (Delta->isZero()) {
     Result.DV[Level].Direction &= ~Dependence::DVEntry::LT;
     Result.DV[Level].Direction &= ~Dependence::DVEntry::GT;
@@ -1418,7 +1481,8 @@ bool DependenceInfo::weakCrossingSIVtest(
 
   // We're certain that Delta > 0 and ConstCoeff > 0.
   // Check Delta/(2*ConstCoeff) against upper loop bound
-  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+  if (const SCEV *UpperBound =
+          collectUpperBound(CurSrcLoop, Delta->getType())) {
     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
     const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
     const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
@@ -1472,7 +1536,6 @@ bool DependenceInfo::weakCrossingSIVtest(
   return false;
 }
 
-
 // Kirch's algorithm, from
 //
 //        Optimizing Supercompilers for Supercomputers
@@ -1558,7 +1621,8 @@ static APInt ceilingOfQuotient(const APInt &A, const APInt &B) {
 // returns all the dependencies that exist between Dst and Src.
 bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
                                   const SCEV *SrcConst, const SCEV *DstConst,
-                                  const Loop *CurLoop, unsigned Level,
+                                  const Loop *CurSrcLoop,
+                                  const Loop *CurDstLoop, unsigned Level,
                                   FullDependence &Result,
                                   Constraint &NewConstraint) const {
   LLVM_DEBUG(dbgs() << "\tExact SIV test\n");
@@ -1573,7 +1637,7 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff), Delta,
-                        CurLoop);
+                        CurSrcLoop, CurDstLoop);
   const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
   const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
   const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
@@ -1600,7 +1664,7 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
   bool UMValid = false;
   // UM is perhaps unavailable, let's check
   if (const SCEVConstant *CUB =
-          collectConstantUpperBound(CurLoop, Delta->getType())) {
+          collectConstantUpperBound(CurSrcLoop, Delta->getType())) {
     UM = CUB->getAPInt();
     LLVM_DEBUG(dbgs() << "\t    UM = " << UM << "\n");
     UMValid = true;
@@ -1707,7 +1771,6 @@ bool DependenceInfo::exactSIVtest(const SCEV *SrcCoeff, const SCEV *DstCoeff,
   return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
 }
 
-
 // Return true if the divisor evenly divides the dividend.
 static
 bool isRemainderZero(const SCEVConstant *Dividend,
@@ -1749,12 +1812,10 @@ bool isRemainderZero(const SCEVConstant *Dividend,
 // (see also weakZeroDstSIVtest)
 //
 // Return true if dependence disproved.
-bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
-                                        const SCEV *SrcConst,
-                                        const SCEV *DstConst,
-                                        const Loop *CurLoop, unsigned Level,
-                                        FullDependence &Result,
-                                        Constraint &NewConstraint) const {
+bool DependenceInfo::weakZeroSrcSIVtest(
+    const SCEV *DstCoeff, const SCEV *SrcConst, const SCEV *DstConst,
+    const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
+    FullDependence &Result, Constraint &NewConstraint) const {
   // For the WeakSIV test, it's possible the loop isn't common to
   // the Src and Dst loops. If it isn't, then there's no need to
   // record a direction.
@@ -1768,7 +1829,7 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
   Result.Consistent = false;
   const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
   NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta,
-                        CurLoop);
+                        CurSrcLoop, CurDstLoop);
   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
     if (Level < CommonLevels) {
@@ -1789,7 +1850,8 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
 
   // check that Delta/SrcCoeff < iteration count
   // really check NewDelta < count*AbsCoeff
-  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+  if (const SCEV *UpperBound =
+          collectUpperBound(CurSrcLoop, Delta->getType())) {
     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
     const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
     if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
@@ -1827,7 +1889,6 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
   return false;
 }
 
-
 // weakZeroDstSIVtest -
 // From the paper, Practical Dependence Testing, Section 4.2.2
 //
@@ -1859,12 +1920,10 @@ bool DependenceInfo::weakZeroSrcSIVtest(const SCEV *DstCoeff,
 // (see also weakZeroSrcSIVtest)
 //
 // Return true if dependence disproved.
-bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
-                                        const SCEV *SrcConst,
-                                        const SCEV *DstConst,
-                                        const Loop *CurLoop, unsigned Level,
-                                        FullDependence &Result,
-                                        Constraint &NewConstraint) const {
+bool DependenceInfo::weakZeroDstSIVtest(
+    const SCEV *SrcCoeff, const SCEV *SrcConst, const SCEV *DstConst,
+    const Loop *CurSrcLoop, const Loop *CurDstLoop, unsigned Level,
+    FullDependence &Result, Constraint &NewConstraint) const {
   // For the WeakSIV test, it's possible the loop isn't common to the
   // Src and Dst loops. If it isn't, then there's no need to record a direction.
   LLVM_DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
@@ -1877,7 +1936,7 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
   Result.Consistent = false;
   const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
   NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta,
-                        CurLoop);
+                        CurSrcLoop, CurDstLoop);
   LLVM_DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
   if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
     if (Level < CommonLevels) {
@@ -1898,7 +1957,8 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
 
   // check that Delta/SrcCoeff < iteration count
   // really check NewDelta < count*AbsCoeff
-  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
+  if (const SCEV *UpperBound =
+          collectUpperBound(CurSrcLoop, Delta->getType())) {
     LLVM_DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
     const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
     if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
@@ -1936,7 +1996,6 @@ bool DependenceInfo::weakZeroDstSIVtest(const SCEV *SrcCoeff,
   return false;
 }
 
-
 // exactRDIVtest - Tests the RDIV subscript pair for dependence.
 // Things of the form [c1 + a*i] and [c2 + b*j],
 // where i and j are induction variable, c1 and c2 are loop invariant,
@@ -2228,43 +2287,47 @@ bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
     const SCEV *DstConst = DstAddRec->getStart();
     const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
     const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
-    const Loop *CurLoop = SrcAddRec->getLoop();
-    assert(CurLoop == DstAddRec->getLoop() &&
-           "both loops in SIV should be same");
-    Level = mapSrcLoop(CurLoop);
+    const Loop *CurSrcLoop = SrcAddRec->getLoop();
+    const Loop *CurDstLoop = DstAddRec->getLoop();
+    assert(areLoopsSimilar(CurSrcLoop, CurDstLoop) &&
+           "both loops in SIV should be the same or have the same tripcount "
+           "and depth");
+    Level = mapSrcLoop(CurSrcLoop);
     bool disproven;
     if (SrcCoeff == DstCoeff)
-      disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
-                                Level, Result, NewConstraint);
+      disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop,
+                                CurDstLoop, Level, Result, NewConstraint);
     else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
-      disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
-                                      Level, Result, NewConstraint, SplitIter);
+      disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop,
+                                      CurDstLoop, Level, Result, NewConstraint,
+                                      SplitIter);
     else
-      disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
-                               Level, Result, NewConstraint);
-    return disproven ||
-      gcdMIVtest(Src, Dst, Result) ||
-      symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
+      disproven =
+          exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurSrcLoop,
+                       CurDstLoop, Level, Result, NewConstraint);
+    return disproven || gcdMIVtest(Src, Dst, Result) ||
+           symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurSrcLoop,
+                            CurDstLoop);
   }
   if (SrcAddRec) {
     const SCEV *SrcConst = SrcAddRec->getStart();
     const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
     const SCEV *DstConst = Dst;
-    const Loop *CurLoop = SrcAddRec->getLoop();
-    Level = mapSrcLoop(CurLoop);
-    return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
-                              Level, Result, NewConstraint) ||
-      gcdMIVtest(Src, Dst, Result);
+    const Loop *CurSrcLoop = SrcAddRec->getLoop();
+    Level = mapSrcLoop(CurSrcLoop);
+    return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurSrcLoop,
+                              CurSrcLoop, Level, Result, NewConstraint) ||
+           gcdMIVtest(Src, Dst, Result);
   }
   if (DstAddRec) {
     const SCEV *DstConst = DstAddRec->getStart();
     const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
     const SCEV *SrcConst = Src;
-    const Loop *CurLoop = DstAddRec->getLoop();
-    Level = mapDstLoop(CurLoop);
-    return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
-                              CurLoop, Level, Result, NewConstraint) ||
-      gcdMIVtest(Src, Dst, Result);
+    const Loop *CurDstLoop = DstAddRec->getLoop();
+    Level = mapDstLoop(CurDstLoop);
+    return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst, CurDstLoop,
+                              CurDstLoop, Level, Result, NewConstraint) ||
+           gcdMIVtest(Src, Dst, Result);
   }
   llvm_unreachable("SIV test expected at least one AddRec");
   return false;
@@ -3169,19 +3232,20 @@ bool DependenceInfo::propagate(const SCEV *&Src, const SCEV *&Dst,
 bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
                                        Constraint &CurConstraint,
                                        bool &Consistent) {
-  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+  const Loop *CurSrcLoop = CurConstraint.getAssociatedSrcLoop();
+  const Loop *CurDstLoop = CurConstraint.getAssociatedDstLoop();
   LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
-  const SCEV *A_K = findCoefficient(Src, CurLoop);
+  const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
   if (A_K->isZero())
     return false;
   const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
   Src = SE->getMinusSCEV(Src, DA_K);
-  Src = zeroCoefficient(Src, CurLoop);
+  Src = zeroCoefficient(Src, CurSrcLoop);
   LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
   LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
-  Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
+  Dst = addToCoefficient(Dst, CurDstLoop, SE->getNegativeSCEV(A_K));
   LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
-  if (!findCoefficient(Dst, CurLoop)->isZero())
+  if (!findCoefficient(Dst, CurDstLoop)->isZero())
     Consistent = false;
   return true;
 }
@@ -3195,7 +3259,8 @@ bool DependenceInfo::propagateDistance(const SCEV *&Src, const SCEV *&Dst,
 bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
                                    Constraint &CurConstraint,
                                    bool &Consistent) {
-  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
+  const Loop *CurSrcLoop = CurConstraint.getAssociatedSrcLoop();
+  const Loop *CurDstLoop = CurConstraint.getAssociatedDstLoop();
   const SCEV *A = CurConstraint.getA();
   const SCEV *B = CurConstraint.getB();
   const SCEV *C = CurConstraint.getC();
@@ -3211,11 +3276,11 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
     APInt Charlie = Cconst->getAPInt();
     APInt CdivB = Charlie.sdiv(Beta);
     assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
-    const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+    const SCEV *AP_K = findCoefficient(Dst, CurDstLoop);
     //    Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
     Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
-    Dst = zeroCoefficient(Dst, CurLoop);
-    if (!findCoefficient(Src, CurLoop)->isZero())
+    Dst = zeroCoefficient(Dst, CurDstLoop);
+    if (!findCoefficient(Src, CurSrcLoop)->isZero())
       Consistent = false;
   }
   else if (B->isZero()) {
@@ -3226,10 +3291,10 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
     APInt Charlie = Cconst->getAPInt();
     APInt CdivA = Charlie.sdiv(Alpha);
     assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
-    const SCEV *A_K = findCoefficient(Src, CurLoop);
+    const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
-    Src = zeroCoefficient(Src, CurLoop);
-    if (!findCoefficient(Dst, CurLoop)->isZero())
+    Src = zeroCoefficient(Src, CurSrcLoop);
+    if (!findCoefficient(Dst, CurDstLoop)->isZero())
       Consistent = false;
   }
   else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
@@ -3240,22 +3305,22 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
     APInt Charlie = Cconst->getAPInt();
     APInt CdivA = Charlie.sdiv(Alpha);
     assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
-    const SCEV *A_K = findCoefficient(Src, CurLoop);
+    const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
-    Src = zeroCoefficient(Src, CurLoop);
-    Dst = addToCoefficient(Dst, CurLoop, A_K);
-    if (!findCoefficient(Dst, CurLoop)->isZero())
+    Src = zeroCoefficient(Src, CurSrcLoop);
+    Dst = addToCoefficient(Dst, CurDstLoop, A_K);
+    if (!findCoefficient(Dst, CurDstLoop)->isZero())
       Consistent = false;
   }
   else {
     // paper is incorrect here, or perhaps just misleading
-    const SCEV *A_K = findCoefficient(Src, CurLoop);
+    const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
     Src = SE->getMulExpr(Src, A);
     Dst = SE->getMulExpr(Dst, A);
     Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
-    Src = zeroCoefficient(Src, CurLoop);
-    Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
-    if (!findCoefficient(Dst, CurLoop)->isZero())
+    Src = zeroCoefficient(Src, CurSrcLoop);
+    Dst = addToCoefficient(Dst, CurDstLoop, SE->getMulExpr(A_K, B));
+    if (!findCoefficient(Dst, CurDstLoop)->isZero())
       Consistent = false;
   }
   LLVM_DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
@@ -3269,17 +3334,18 @@ bool DependenceInfo::propagateLine(const SCEV *&Src, const SCEV *&Dst,
 // Return true if some simplification occurs.
 bool DependenceInfo::propagatePoint(const SCEV *&Src, const SCEV *&Dst,
                                     Constraint &CurConstraint) {
-  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
-  const SCEV *A_K = findCoefficient(Src, CurLoop);
-  const SCEV *AP_K = findCoefficient(Dst, CurLoop);
+  const Loop *CurSrcLoop = CurConstraint.getAssociatedSrcLoop();
+  const Loop *CurDstLoop = CurConstraint.getAssociatedDstLoop();
+  const SCEV *A_K = findCoefficient(Src, CurSrcLoop);
+  const SCEV *AP_K = findCoefficient(Dst, CurDstLoop);
   const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
   const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
   LLVM_DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
   Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
-  Src = zeroCoefficient(Src, CurLoop);
+  Src = zeroCoefficient(Src, CurSrcLoop);
   LLVM_DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
   LLVM_DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
-  Dst = zeroCoefficient(Dst, CurLoop);
+  Dst = zeroCoefficient(Dst, CurDstLoop);
   LLVM_DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
   return true;
 }
@@ -3615,14 +3681,6 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
     break; // The underlying objects alias; test accesses for dependence.
   }
 
-  // establish loop nesting levels
-  establishNestingLevels(Src, Dst);
-  LLVM_DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
-  LLVM_DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
-
-  FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
-  ++TotalArrayPairs;
-
   unsigned Pairs = 1;
   SmallVector<Subscript, 2> Pair(Pairs);
   const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
@@ -3649,6 +3707,46 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
     }
   }
 
+  // Establish loop nesting levels considering separate but similar loops as
+  // common
+  establishNestingLevels(Src, Dst);
+
+  LLVM_DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
+  LLVM_DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
+  LLVM_DEBUG(dbgs() << "    separate nesting levels = " << SeparateLevels
+                    << "\n");
+
+  // Modify common levels to consider the similar separate levels in the tests
+  CommonLevels += SeparateLevels;
+  MaxLevels -= SeparateLevels;
+  if (SeparateLevels > 0) {
+    // Not all tests are handled yet over separate loops
+    // Revoke if there are any tests other than ZIV, SIV or RDIV
+    for (unsigned P = 0; P < Pairs; ++P) {
+      Pair[P].Loops.resize(MaxLevels + 1);
+      Subscript::ClassificationKind TestClass = classifyPair(
+          Pair[P].Src, LI->getLoopFor(Src->getParent()), Pair[P].Dst,
+          LI->getLoopFor(Dst->getParent()), Pair[P].Loops);
+
+      if (TestClass != Subscript::ZIV && TestClass != Subscript::SIV &&
+          TestClass != Subscript::RDIV) {
+        // Revert the levels to not consider the separate levels
+        CommonLevels -= SeparateLevels;
+        MaxLevels += SeparateLevels;
+        SeparateLevels = 0;
+        break;
+      }
+    }
+  }
+
+  FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
+  ++TotalArrayPairs;
+
+  if (SeparateLevels > 0) {
+    Result.Consistent = false;
+    SeparateLoopsConsidered++;
+  }
+
   for (unsigned P = 0; P < Pairs; ++P) {
     Pair[P].Loops.resize(MaxLevels + 1);
     Pair[P].GroupLoops.resize(MaxLevels + 1);
@@ -3938,6 +4036,25 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst) {
     if (CompleteLoops[II])
       Result.DV[II - 1].Scalar = false;
 
+  if (SeparateLevels > 0) {
+    // Extracting separate levels from the common levels
+    // Reverting CommonLevels and MaxLevels to their original values
+    assert(CommonLevels >= SeparateLevels);
+    CommonLevels -= SeparateLevels;
+    MaxLevels += SeparateLevels;
+    std::unique_ptr<FullDependence::DVEntry[]> DV, DVSeparate;
+    DV = std::make_unique<FullDependence::DVEntry[]>(CommonLevels);
+    DVSeparate = std::make_unique<FullDependence::DVEntry[]>(SeparateLevels);
+    for (unsigned level = 0; level < CommonLevels; ++level)
+      DV[level] = Result.DV[level];
+    for (unsigned level = 0; level < SeparateLevels; ++level)
+      DVSeparate[level] = Result.DV[CommonLevels + level];
+    Result.DV = std::move(DV);
+    Result.DVSeparate = std::move(DVSeparate);
+    Result.Levels = CommonLevels;
+    Result.SeparateLevels = SeparateLevels;
+  }
+
   if (PossiblyLoopIndependent) {
     // Make sure the LoopIndependent flag is set correctly.
     // All directions must include equal, otherwise no
diff --git a/llvm/test/Analysis/DependenceAnalysis/SIVSeparateLoops.ll b/llvm/test/Analysis/DependenceAnalysis/SIVSeparateLoops.ll
new file mode 100644
index 0000000000000..3d7fd12e735d3
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/SIVSeparateLoops.ll
@@ -0,0 +1,145 @@
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
+; RUN:   -da-disable-delinearization-checks | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+;;  for (long int i = 0; i < n; i++) {
+;;    for (long int j = 0; j < n; j++) {
+;;      for (long int k = 0; k < n; k++) {
+;;        for (long int l = 0; l < n; l++)
+;;          A[i][j][k][l] = i;
+;;      }
+;;      for (long int k = 1; k < n+1; k++) {
+;;        for (long int l = 0; l < n; l++)
+;;          *B++ = A[i + 4][j + 3][k + 2][l + 1];
+
+define void @SIVSeparate(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp {
+entry:
+  %cmp10 = icmp sgt i64 %n, 0
+  br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end35
+  
+; CHECK-LABEL: SIVSeparate
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [-4 -3]! / assuming 2 fused loop(s): [-4 -3 -3 -1]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* * * *]!
+  
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc33
+  %B.addr.014 = phi ptr [ %B.addr.1.lcssa, %for.inc33 ], [ %B, %for.cond1.preheader.preheader ]
+  %i.013 = phi i64 [ %inc34, %for.inc33 ], [ 0, %for.cond1.preheader.preheader ]
+  %cmp28 = icmp sgt i64 %n, 0
+  br i1 %cmp28, label %for.cond4.preheader.preheader, label %for.inc33
+
+for.cond4.preheader.preheader:                    ; preds = %for.cond1.preheader
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.inc30
+  %B.addr.110 = phi ptr [ %B.addr.2.lcssa, %for.inc30 ], [ %B.addr.014, %for.cond4.preheader.preheader ]
+  %j.09 = phi i64 [ %inc31, %for.inc30 ], [ 0, %for.cond4.preheader.preheader ]
+  %cmp53 = icmp sgt i64 %n, 0
+  br i1 %cmp53, label %for.cond7.preheader.preheader, label %for.cond15.loopexit
+
+for.cond7.preheader.preheader:                    ; preds = %for.cond4.preheader
+  br label %for.cond7.preheader
+
+for.cond7.preheader:                              ; preds = %for.cond7.preheader.preheader, %for.inc12
+  %k.07 = phi i64 [ %inc13, %for.inc12 ], [ 0, %for.cond7.preheader.preheader ]
+  %cmp81 = icmp sgt i64 %n, 0
+  br i1 %cmp81, label %for.body9.preheader, label %for.inc12
+
+for.body9.preheader:                              ; preds = %for.cond7.preheader
+  br label %for.body9
+
+for.body9:                                        ; preds = %for.body9.preheader, %for.body9
+  %l.02 = phi i64 [ %inc11, %for.body9 ], [ 0, %for.body9.preheader ]
+  %arrayidx12 = getelementptr inbounds [100 x [100 x [100 x i64]]], ptr %A, i64 %i.013, i64 %j.09, i64 %k.07, i64 %l.02
+  store i64 %i.013, ptr %arrayidx12, align 8
+  %inc11 = add nsw i64 %l.02, 1
+  %exitcond15 = icmp ne i64 %inc11, %n
+  br i1 %exitcond15, label %for.body9, label %for.inc12.loopexit
+
+for.inc12.loopexit:                               ; preds = %for.body9
+  br label %for.inc12
+
+for.inc12:                                        ; preds = %for.inc12.loopexit, %for.cond7.preheader
+  %inc13 = add nsw i64 %k.07, 1
+  %exitcond16 = icmp ne i64 %inc13, %n
+  br i1 %exitcond16, label %for.cond7.preheader, label %for.cond15.loopexit.loopexit
+
+for.cond15.loopexit.loopexit:                     ; preds = %for.inc12
+  br label %for.cond15.loopexit
+
+for.cond15.loopexit:                              ; preds = %for.cond15.loopexit.loopexit, %for.cond4.preheader
+  %cmp163 = icmp sgt i64 %n, 0
+  br i1 %cmp163, label %for.cond18.preheader.preheader, label %for.inc30
+
+for.cond18.preheader.preheader:                   ; preds = %for.cond15.loopexit
+  br label %for.cond18.preheader
+
+for.cond18.preheader:                             ; preds = %for.cond18.preheader.preheader, %for.inc27
+  %k14.06 = phi i64 [ %inc28, %for.inc27 ], [ 1, %for.cond18.preheader.preheader ]
+  %B.addr.25 = phi ptr [ %B.addr.3.lcssa, %for.inc27 ], [ %B.addr.110, %for.cond18.preheader.preheader ]
+  %cmp191 = icmp sgt i64 %n, 0
+  br i1 %cmp191, label %for.body20.preheader, label %for.inc27
+
+for.body20.preheader:                             ; preds = %for.cond18.preheader
+  br label %for.body20
+
+for.body20:                                       ; preds = %for.body20.preheader, %for.body20
+  %l17.04 = phi i64 [ %inc25, %for.body20 ], [ 0, %for.body20.preheader ]
+  %B.addr.34 = phi ptr [ %incdec.ptr, %for.body20 ], [ %B.addr.25, %for.body20.preheader ]
+  %add = add nsw i64 %l17.04, 1
+  %add21 = add nsw i64 %k14.06, 2
+  %add22 = add nsw i64 %j.09, 3
+  %add23 = add nsw i64 %i.013, 4
+  %arrayidx24 = getelementptr inbounds [100 x [100 x [100 x i64]]], ptr %A, i64 %add23, i64 %add22, i64 %add21, i64 %add
+  %0 = load i64, ptr %arrayidx24, align 8
+  %incdec.ptr = getelementptr inbounds i64, ptr %B.addr.34, i64 1
+  store i64 %0, ptr %B.addr.34, align 8
+  %inc25 = add nsw i64 %l17.04, 1
+  %exitcond = icmp ne i64 %inc25, %n
+  br i1 %exitcond, label %for.body20, label %for.inc27.loopexit
+
+for.inc27.loopexit:                               ; preds = %for.body20
+  %scevgep = getelementptr i64, ptr %B.addr.25, i64 %n
+  br label %for.inc27
+
+for.inc27:                                        ; preds = %for.inc27.loopexit, %for.cond18.preheader
+  %B.addr.3.lcssa = phi ptr [ %B.addr.25, %for.cond18.preheader ], [ %scevgep, %for.inc27.loopexit ]
+  %inc28 = add nsw i64 %k14.06, 1
+  %inc29 = add nsw i64 %n, 1
+  %exitcond17 = icmp ne i64 %inc28, %inc29
+  br i1 %exitcond17, label %for.cond18.preheader, label %for.inc30.loopexit
+
+for.inc30.loopexit:                               ; preds = %for.inc27
+  %B.addr.3.lcssa.lcssa = phi ptr [ %B.addr.3.lcssa, %for.inc27 ]
+  br label %for.inc30
+
+for.inc30:                                        ; preds = %for.inc30.loopexit, %for.cond15.loopexit
+  %B.addr.2.lcssa = phi ptr [ %B.addr.110, %for.cond15.loopexit ], [ %B.addr.3.lcssa.lcssa, %for.inc30.loopexit ]
+  %inc31 = add nsw i64 %j.09, 1
+  %exitcond18 = icmp ne i64 %inc31, %n
+  br i1 %exitcond18, label %for.cond4.preheader, label %for.inc33.loopexit
+
+for.inc33.loopexit:                               ; preds = %for.inc30
+  %B.addr.2.lcssa.lcssa = phi ptr [ %B.addr.2.lcssa, %for.inc30 ]
+  br label %for.inc33
+
+for.inc33:                                        ; preds = %for.inc33.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi ptr [ %B.addr.014, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc33.loopexit ]
+  %inc34 = add nsw i64 %i.013, 1
+  %exitcond19 = icmp ne i64 %inc34, %n
+  br i1 %exitcond19, label %for.cond1.preheader, label %for.end35.loopexit
+
+for.end35.loopexit:                               ; preds = %for.inc33
+  br label %for.end35
+
+for.end35:                                        ; preds = %for.end35.loopexit, %entry
+  ret void
+}



More information about the llvm-commits mailing list