[llvm] 1b811ff - [DA] Delinearization of fixed-size multi-dimensional arrays

Thu Feb 27 07:34:36 PST 2020

Author: Bardia Mahjour
Date: 2020-02-27T10:29:01-05:00
New Revision: 1b811ff8a935fafb279ad9bad36d36058a129b42

URL: https://github.com/llvm/llvm-project/commit/1b811ff8a935fafb279ad9bad36d36058a129b42
DIFF: https://github.com/llvm/llvm-project/commit/1b811ff8a935fafb279ad9bad36d36058a129b42.diff

LOG: [DA] Delinearization of fixed-size multi-dimensional arrays

Summary:
Currently the dependence analysis in LLVM is unable to compute accurate
dependence vectors for multi-dimensional fixed size arrays.
This is mainly because the delinearization algorithm in scalar evolution
relies on parametric terms to be present in the access functions. In the
case of fixed size arrays such parametric terms are not present, but we
can use the indexes from GEP instructions to recover the subscripts for
each dimension of the arrays. This patch adds this ability under the
existing option `-da-disable-delinearization-checks`.

Authored By: bmahjour

Reviewer: Meinersbur, sebpop, fhahn, dmgreen, grosser, etiotto, bollu

Reviewed By: Meinersbur

Subscribers: hiraditya, arphaman, Whitney, ppc-slack, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D72178

Added: 
    llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
    llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll

Modified: 
    llvm/include/llvm/Analysis/DependenceAnalysis.h
    llvm/lib/Analysis/DependenceAnalysis.cpp
    llvm/test/Transforms/LoopInterchange/currentLimitation.ll
    llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
    llvm/test/Transforms/LoopInterchange/profitability.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 0c4002c3c3ba..ba1153dbba28 100644

--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -924,10 +924,28 @@ template <typename T> class ArrayRef;
     void updateDirection(Dependence::DVEntry &Level,
                          const Constraint &CurConstraint) const;
 
+    /// Given a linear access function, tries to recover subscripts
+    /// for each dimension of the array element access.
     bool tryDelinearize(Instruction *Src, Instruction *Dst,
                         SmallVectorImpl<Subscript> &Pair);
 
-  private:
+    /// Tries to delinearize access function for a fixed size multi-dimensional
+    /// array, by deriving subscripts from GEP instructions. Returns true upon
+    /// success and false otherwise.
+    bool tryDelinearizeFixedSize(Instruction *Src, Instruction *Dst,
+                                 const SCEV *SrcAccessFn,
+                                 const SCEV *DstAccessFn,
+                                 SmallVectorImpl<const SCEV *> &SrcSubscripts,
+                                 SmallVectorImpl<const SCEV *> &DstSubscripts);
+
+    /// Tries to delinearize access function for a multi-dimensional array with
+    /// symbolic runtime sizes.
+    /// Returns true upon success and false otherwise.
+    bool tryDelinearizeParametricSize(
+        Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
+        const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
+        SmallVectorImpl<const SCEV *> &DstSubscripts);
+
     /// checkSubscript - Helper function for checkSrcSubscript and
     /// checkDstSubscript to avoid duplicate code
     bool checkSubscript(const SCEV *Expr, const Loop *LoopNest,

diff  --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 9b38053c196b..96c7bc7365a2 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3264,23 +3264,134 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
   assert(isLoadOrStore(Dst) && "instruction is not load or store");
   Value *SrcPtr = getLoadStorePointerOperand(Src);
   Value *DstPtr = getLoadStorePointerOperand(Dst);
-
   Loop *SrcLoop = LI->getLoopFor(Src->getParent());
   Loop *DstLoop = LI->getLoopFor(Dst->getParent());
+  const SCEV *SrcAccessFn = SE->getSCEVAtScope(SrcPtr, SrcLoop);
+  const SCEV *DstAccessFn = SE->getSCEVAtScope(DstPtr, DstLoop);
+  const SCEVUnknown *SrcBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
+  const SCEVUnknown *DstBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+
+  if (!SrcBase || !DstBase || SrcBase != DstBase)
+    return false;
 
-  // Below code mimics the code in Delinearization.cpp
-  const SCEV *SrcAccessFn =
-    SE->getSCEVAtScope(SrcPtr, SrcLoop);
-  const SCEV *DstAccessFn =
-    SE->getSCEVAtScope(DstPtr, DstLoop);
+  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
+
+  if (!tryDelinearizeFixedSize(Src, Dst, SrcAccessFn, DstAccessFn,
+                               SrcSubscripts, DstSubscripts) &&
+      !tryDelinearizeParametricSize(Src, Dst, SrcAccessFn, DstAccessFn,
+                                    SrcSubscripts, DstSubscripts))
+    return false;
+
+  int Size = SrcSubscripts.size();
+  LLVM_DEBUG({
+    dbgs() << "\nSrcSubscripts: ";
+    for (int I = 0; I < Size; I++)
+      dbgs() << *SrcSubscripts[I];
+    dbgs() << "\nDstSubscripts: ";
+    for (int I = 0; I < Size; I++)
+      dbgs() << *DstSubscripts[I];
+  });
 
+  // The delinearization transforms a single-subscript MIV dependence test into
+  // a multi-subscript SIV dependence test that is easier to compute. So we
+  // resize Pair to contain as many pairs of subscripts as the delinearization
+  // has found, and then initialize the pairs following the delinearization.
+  Pair.resize(Size);
+  for (int I = 0; I < Size; ++I) {
+    Pair[I].Src = SrcSubscripts[I];
+    Pair[I].Dst = DstSubscripts[I];
+    unifySubscriptType(&Pair[I]);
+  }
+
+  return true;
+}
+
+bool DependenceInfo::tryDelinearizeFixedSize(
+    Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
+    const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
+    SmallVectorImpl<const SCEV *> &DstSubscripts) {
+
+  // In general we cannot safely assume that the subscripts recovered from GEPs
+  // are in the range of values defined for their corresponding array
+  // dimensions. For example some C language usage/interpretation make it
+  // impossible to verify this at compile-time. As such we give up here unless
+  // we can assume that the subscripts do not overlap into neighboring
+  // dimensions and that the number of dimensions matches the number of
+  // subscripts being recovered.
+  if (!DisableDelinearizationChecks)
+    return false;
+
+  Value *SrcPtr = getLoadStorePointerOperand(Src);
+  Value *DstPtr = getLoadStorePointerOperand(Dst);
   const SCEVUnknown *SrcBase =
       dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
   const SCEVUnknown *DstBase =
       dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+  assert(SrcBase && DstBase && SrcBase == DstBase &&
+         "expected src and dst scev unknowns to be equal");
 
-  if (!SrcBase || !DstBase || SrcBase != DstBase)
+  // Check the simple case where the array dimensions are fixed size.
+  auto *SrcGEP = dyn_cast<GetElementPtrInst>(SrcPtr);
+  auto *DstGEP = dyn_cast<GetElementPtrInst>(DstPtr);
+  if (!SrcGEP || !DstGEP)
+    return false;
+
+  SmallVector<int, 4> SrcSizes, DstSizes;
+  SE->getIndexExpressionsFromGEP(SrcGEP, SrcSubscripts, SrcSizes);
+  SE->getIndexExpressionsFromGEP(DstGEP, DstSubscripts, DstSizes);
+
+  // Check that the two size arrays are non-empty and equal in length and
+  // value.
+  if (SrcSizes.empty() || SrcSubscripts.size() <= 1 ||
+      SrcSizes.size() != DstSizes.size() ||
+      !std::equal(SrcSizes.begin(), SrcSizes.end(), DstSizes.begin())) {
+    SrcSubscripts.clear();
+    DstSubscripts.clear();
     return false;
+  }
+
+  Value *SrcBasePtr = SrcGEP->getOperand(0);
+  Value *DstBasePtr = DstGEP->getOperand(0);
+  while (auto *PCast = dyn_cast<BitCastInst>(SrcBasePtr))
+    SrcBasePtr = PCast->getOperand(0);
+  while (auto *PCast = dyn_cast<BitCastInst>(DstBasePtr))
+    DstBasePtr = PCast->getOperand(0);
+
+  // Check that for identical base pointers we do not miss index offsets
+  // that have been added before this GEP is applied.
+  if (SrcBasePtr == SrcBase->getValue() && DstBasePtr == DstBase->getValue()) {
+    assert(SrcSubscripts.size() == DstSubscripts.size() &&
+           SrcSubscripts.size() == SrcSizes.size() + 1 &&
+           "Expected equal number of entries in the list of sizes and "
+           "subscripts.");
+    LLVM_DEBUG({
+      dbgs() << "Delinearized subscripts of fixed-size array\n"
+             << "SrcGEP:" << *SrcGEP << "\n"
+             << "DstGEP:" << *DstGEP << "\n";
+    });
+    return true;
+  }
+
+  SrcSubscripts.clear();
+  DstSubscripts.clear();
+  return false;
+}
+
+bool DependenceInfo::tryDelinearizeParametricSize(
+    Instruction *Src, Instruction *Dst, const SCEV *SrcAccessFn,
+    const SCEV *DstAccessFn, SmallVectorImpl<const SCEV *> &SrcSubscripts,
+    SmallVectorImpl<const SCEV *> &DstSubscripts) {
+
+  Value *SrcPtr = getLoadStorePointerOperand(Src);
+  Value *DstPtr = getLoadStorePointerOperand(Dst);
+  const SCEVUnknown *SrcBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));
+  const SCEVUnknown *DstBase =
+      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));
+  assert(SrcBase && DstBase && SrcBase == DstBase &&
+         "expected src and dst scev unknowns to be equal");
 
   const SCEV *ElementSize = SE->getElementSize(Src);
   if (ElementSize != SE->getElementSize(Dst))
@@ -3304,7 +3415,6 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
   SE->findArrayDimensions(Terms, Sizes, ElementSize);
 
   // Third step: compute the access functions for each subscript.
-  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts;
   SE->computeAccessFunctions(SrcAR, SrcSubscripts, Sizes);
   SE->computeAccessFunctions(DstAR, DstSubscripts, Sizes);
 
@@ -3313,7 +3423,7 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
       SrcSubscripts.size() != DstSubscripts.size())
     return false;
 
-  int size = SrcSubscripts.size();
+  size_t Size = SrcSubscripts.size();
 
   // Statically check that the array bounds are in-range. The first subscript we
   // don't have a size for and it cannot overflow into another subscript, so is
@@ -3322,40 +3432,20 @@ bool DependenceInfo::tryDelinearize(Instruction *Src, Instruction *Dst,
   // FIXME: It may be better to record these sizes and add them as constraints
   // to the dependency checks.
   if (!DisableDelinearizationChecks)
-    for (int i = 1; i < size; ++i) {
-      if (!isKnownNonNegative(SrcSubscripts[i], SrcPtr))
+    for (size_t I = 1; I < Size; ++I) {
+      if (!isKnownNonNegative(SrcSubscripts[I], SrcPtr))
         return false;
 
-      if (!isKnownLessThan(SrcSubscripts[i], Sizes[i - 1]))
+      if (!isKnownLessThan(SrcSubscripts[I], Sizes[I - 1]))
         return false;
 
-      if (!isKnownNonNegative(DstSubscripts[i], DstPtr))
+      if (!isKnownNonNegative(DstSubscripts[I], DstPtr))
         return false;
 
-      if (!isKnownLessThan(DstSubscripts[i], Sizes[i - 1]))
+      if (!isKnownLessThan(DstSubscripts[I], Sizes[I - 1]))
         return false;
     }
 
-  LLVM_DEBUG({
-    dbgs() << "\nSrcSubscripts: ";
-    for (int i = 0; i < size; i++)
-      dbgs() << *SrcSubscripts[i];
-    dbgs() << "\nDstSubscripts: ";
-    for (int i = 0; i < size; i++)
-      dbgs() << *DstSubscripts[i];
-  });
-
-  // The delinearization transforms a single-subscript MIV dependence test into
-  // a multi-subscript SIV dependence test that is easier to compute. So we
-  // resize Pair to contain as many pairs of subscripts as the delinearization
-  // has found, and then initialize the pairs following the delinearization.
-  Pair.resize(size);
-  for (int i = 0; i < size; ++i) {
-    Pair[i].Src = SrcSubscripts[i];
-    Pair[i].Dst = DstSubscripts[i];
-    unifySubscriptType(&Pair[i]);
-  }
-
   return true;
 }
 

diff  --git a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
new file mode 100644
index 000000000000..21bc37d17047
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
@@ -0,0 +1,106 @@
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 \
+; RUN:   -da-disable-delinearization-checks | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.6.0"
+
+;;  for (long int i = 0; i < n; i++) {
+;;    for (long int j = 0; j < n; j++) {
+;;      for (long int k = 0; k < n; k++) {
+;;        A[i][j][k] = i;
+;;      }
+;;      for (long int k = 0; k < n; k++) {
+;;        *B++ = A[i + 3][j + 2][k + 1];
+
+define void @p2(i64 %n, [100 x [100 x i64]]* %A, i64* %B) nounwind uwtable ssp {
+entry:
+  %cmp10 = icmp sgt i64 %n, 0
+  br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
+
+; CHECK-LABEL: p2
+; CHECK: da analyze - none!
+; CHECK: da analyze - flow [-3 -2]!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - none!
+; CHECK: da analyze - confused!
+; CHECK: da analyze - output [* * *]!
+
+for.cond1.preheader.preheader:                    ; preds = %entry
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc24
+  %B.addr.012 = phi i64* [ %B.addr.1.lcssa, %for.inc24 ], [ %B, %for.cond1.preheader.preheader ]
+  %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
+  %cmp26 = icmp sgt i64 %n, 0
+  br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
+
+for.cond4.preheader.preheader:                    ; preds = %for.cond1.preheader
+  br label %for.cond4.preheader
+
+for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.inc21
+  %B.addr.18 = phi i64* [ %B.addr.2.lcssa, %for.inc21 ], [ %B.addr.012, %for.cond4.preheader.preheader ]
+  %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
+  %cmp51 = icmp sgt i64 %n, 0
+  br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
+
+for.body6.preheader:                              ; preds = %for.cond4.preheader
+  br label %for.body6
+
+for.body6:                                        ; preds = %for.body6.preheader, %for.body6
+  %k.02 = phi i64 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
+  %arrayidx8 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %i.011, i64 %j.07, i64 %k.02
+  store i64 %i.011, i64* %arrayidx8, align 8
+  %inc = add nsw i64 %k.02, 1
+  %exitcond13 = icmp ne i64 %inc, %n
+  br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
+
+for.cond10.loopexit.loopexit:                     ; preds = %for.body6
+  br label %for.cond10.loopexit
+
+for.cond10.loopexit:                              ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
+  %cmp113 = icmp sgt i64 %n, 0
+  br i1 %cmp113, label %for.body12.preheader, label %for.inc21
+
+for.body12.preheader:                             ; preds = %for.cond10.loopexit
+  br label %for.body12
+
+for.body12:                                       ; preds = %for.body12.preheader, %for.body12
+  %k9.05 = phi i64 [ %inc19, %for.body12 ], [ 0, %for.body12.preheader ]
+  %B.addr.24 = phi i64* [ %incdec.ptr, %for.body12 ], [ %B.addr.18, %for.body12.preheader ]
+  %add = add nsw i64 %k9.05, 1
+  %add13 = add nsw i64 %j.07, 2
+  %add14 = add nsw i64 %i.011, 3
+  %arrayidx17 = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* %A, i64 %add14, i64 %add13, i64 %add
+  %0 = load i64, i64* %arrayidx17, align 8
+  %incdec.ptr = getelementptr inbounds i64, i64* %B.addr.24, i64 1
+  store i64 %0, i64* %B.addr.24, align 8
+  %inc19 = add nsw i64 %k9.05, 1
+  %exitcond = icmp ne i64 %inc19, %n
+  br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
+
+for.inc21.loopexit:                               ; preds = %for.body12
+  %scevgep = getelementptr i64, i64* %B.addr.18, i64 %n
+  br label %for.inc21
+
+for.inc21:                                        ; preds = %for.inc21.loopexit, %for.cond10.loopexit
+  %B.addr.2.lcssa = phi i64* [ %B.addr.18, %for.cond10.loopexit ], [ %scevgep, %for.inc21.loopexit ]
+  %inc22 = add nsw i64 %j.07, 1
+  %exitcond14 = icmp ne i64 %inc22, %n
+  br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
+
+for.inc24.loopexit:                               ; preds = %for.inc21
+  %B.addr.2.lcssa.lcssa = phi i64* [ %B.addr.2.lcssa, %for.inc21 ]
+  br label %for.inc24
+
+for.inc24:                                        ; preds = %for.inc24.loopexit, %for.cond1.preheader
+  %B.addr.1.lcssa = phi i64* [ %B.addr.012, %for.cond1.preheader ], [ %B.addr.2.lcssa.lcssa, %for.inc24.loopexit ]
+  %inc25 = add nsw i64 %i.011, 1
+  %exitcond15 = icmp ne i64 %inc25, %n
+  br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
+
+for.end26.loopexit:                               ; preds = %for.inc24
+  br label %for.end26
+
+for.end26:                                        ; preds = %for.end26.loopexit, %entry
+  ret void
+}

diff  --git a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll
new file mode 100644
index 000000000000..d783d2ec163f
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheckFixedSize.ll
@@ -0,0 +1,120 @@
+; RUN: opt < %s -disable-output -passes="print<da>"                            \
+; RUN: -da-disable-delinearization-checks 2>&1 | FileCheck %s
+; RUN: opt < %s -da -analyze -da-disable-delinearization-checks | FileCheck %s
+
+; CHECK-LABEL: t1
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent anti [1 -2]!
+; CHECK: da analyze - none!
+
+;; #define N 1024
+;; #define M 2048
+;; void t1(int a[N][M]) {
+;;   for (int i = 0; i < N-1; ++i)
+;;     for (int j = 2; j < M; ++j)
+;;       a[i][j] = a[i+1][j-2];
+;; }
+
+define void @t1([2048 x i32]* %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc11
+  %indvars.iv4 = phi i64 [ 0, %entry ], [ %indvars.iv.next5, %for.inc11 ]
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body, %for.body4
+  %indvars.iv = phi i64 [ 2, %for.body ], [ %indvars.iv.next, %for.body4 ]
+  %0 = add nuw nsw i64 %indvars.iv4, 1
+  %1 = add nsw i64 %indvars.iv, -2
+  %arrayidx6 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %0, i64 %1
+  %2 = load i32, i32* %arrayidx6, align 4
+  %arrayidx10 = getelementptr inbounds [2048 x i32], [2048 x i32]* %a, i64 %indvars.iv4, i64 %indvars.iv
+  store i32 %2, i32* %arrayidx10, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 2048
+  br i1 %exitcond, label %for.body4, label %for.inc11
+
+for.inc11:                                        ; preds = %for.body4
+  %indvars.iv.next5 = add nuw nsw i64 %indvars.iv4, 1
+  %exitcond7 = icmp ne i64 %indvars.iv.next5, 1023
+  br i1 %exitcond7, label %for.body, label %for.end13
+
+for.end13:                                        ; preds = %for.inc11
+  ret void
+}
+
+
+; CHECK-LABEL: t2
+; CHECK: da analyze - none!
+; CHECK: da analyze - consistent anti [1 -2 0 -3 2]!
+; CHECK: da analyze - none!
+
+;; #define N 1024
+;; #define M 2048
+;; void t2(int a[][N][N][N][M]) {
+;;   for (int i1 = 0; i1 < N-1; ++i1)
+;;     for (int i2 = 2; i2 < N; ++i2)
+;;       for (int i3 = 0; i3 < N; ++i3)
+;;         for (int i4 = 3; i4 < N; ++i4)
+;;           for (int i5 = 0; i5 < M-2; ++i5)
+;;             a[i1][i2][i3][i4][i5] = a[i1+1][i2-2][i3][i4-3][i5+2];
+;; }
+
+define void @t2([1024 x [1024 x [1024 x [2048 x i32]]]]* %a) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.inc46
+  %indvars.iv18 = phi i64 [ 0, %entry ], [ %indvars.iv.next19, %for.inc46 ]
+  br label %for.body4
+
+for.body4:                                        ; preds = %for.body, %for.inc43
+  %indvars.iv14 = phi i64 [ 2, %for.body ], [ %indvars.iv.next15, %for.inc43 ]
+  br label %for.body8
+
+for.body8:                                        ; preds = %for.body4, %for.inc40
+  %indvars.iv11 = phi i64 [ 0, %for.body4 ], [ %indvars.iv.next12, %for.inc40 ]
+  br label %for.body12
+
+for.body12:                                       ; preds = %for.body8, %for.inc37
+  %indvars.iv7 = phi i64 [ 3, %for.body8 ], [ %indvars.iv.next8, %for.inc37 ]
+  br label %for.body16
+
+for.body16:                                       ; preds = %for.body12, %for.body16
+  %indvars.iv = phi i64 [ 0, %for.body12 ], [ %indvars.iv.next, %for.body16 ]
+  %0 = add nuw nsw i64 %indvars.iv18, 1
+  %1 = add nsw i64 %indvars.iv14, -2
+  %2 = add nsw i64 %indvars.iv7, -3
+  %3 = add nuw nsw i64 %indvars.iv, 2
+  %arrayidx26 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %0, i64 %1, i64 %indvars.iv11, i64 %2, i64 %3
+  %4 = load i32, i32* %arrayidx26, align 4
+  %arrayidx36 = getelementptr inbounds [1024 x [1024 x [1024 x [2048 x i32]]]], [1024 x [1024 x [1024 x [2048 x i32]]]]* %a, i64 %indvars.iv18, i64 %indvars.iv14, i64 %indvars.iv11, i64 %indvars.iv7, i64 %indvars.iv
+  store i32 %4, i32* %arrayidx36, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, 2046
+  br i1 %exitcond, label %for.body16, label %for.inc37
+
+for.inc37:                                        ; preds = %for.body16
+  %indvars.iv.next8 = add nuw nsw i64 %indvars.iv7, 1
+  %exitcond10 = icmp ne i64 %indvars.iv.next8, 1024
+  br i1 %exitcond10, label %for.body12, label %for.inc40
+
+for.inc40:                                        ; preds = %for.inc37
+  %indvars.iv.next12 = add nuw nsw i64 %indvars.iv11, 1
+  %exitcond13 = icmp ne i64 %indvars.iv.next12, 1024
+  br i1 %exitcond13, label %for.body8, label %for.inc43
+
+for.inc43:                                        ; preds = %for.inc40
+  %indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1
+  %exitcond17 = icmp ne i64 %indvars.iv.next15, 1024
+  br i1 %exitcond17, label %for.body4, label %for.inc46
+
+for.inc46:                                        ; preds = %for.inc43
+  %indvars.iv.next19 = add nuw nsw i64 %indvars.iv18, 1
+  %exitcond21 = icmp ne i64 %indvars.iv.next19, 1023
+  br i1 %exitcond21, label %for.body, label %for.end48
+
+for.end48:                                        ; preds = %for.inc46
+  ret void
+}

diff  --git a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
index 1a9d42dbb9ae..4f20bdc1e2e6 100644
--- a/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
+++ b/llvm/test/Transforms/LoopInterchange/currentLimitation.ll
@@ -2,6 +2,11 @@
 ; RUN:   -pass-remarks-output=%t -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
 ; RUN: FileCheck --input-file=%t %s
 
+; RUN: opt < %s -basicaa -loop-interchange -pass-remarks-missed='loop-interchange' \
+; RUN:   -da-disable-delinearization-checks -pass-remarks-output=%t             \
+; RUN:   -verify-loop-info -verify-dom-info -S | FileCheck -check-prefix=IR %s
+; RUN: FileCheck --check-prefix=DELIN --input-file=%t %s
+
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
  
@@ -16,13 +21,14 @@ target triple = "x86_64-unknown-linux-gnu"
 ;;    for(int j=1;j<N-1;j++)
 ;;      A[j+1][i+1] = A[j+1][i+1] + k;
 
-; FIXME: Currently fails because of DA changes.
 ; IR-LABEL: @interchange_01
 ; IR-NOT: split
 
 ; CHECK:      Name:            Dependence
 ; CHECK-NEXT: Function:        interchange_01
 
+; DELIN:      Name:            UnsupportedInsBetweenInduction
+; DELIN-NEXT: Function:        interchange_01
 define void @interchange_01(i32 %k, i32 %N) {
  entry:
    %sub = add nsw i32 %N, -1

diff  --git a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
index b19a4a4875b9..8aaf7c7e97c4 100644
--- a/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
+++ b/llvm/test/Transforms/LoopInterchange/loop-interchange-optimization-remarks.ll
@@ -5,6 +5,11 @@
 ; RUN:     -pass-remarks='loop-interchange' -S
 ; RUN: cat %t |  FileCheck %s
 
+; RUN: opt < %s -basicaa -loop-interchange -verify-dom-info -verify-loop-info \
+; RUN:     -pass-remarks-output=%t -pass-remarks-missed='loop-interchange' \
+; RUN:     -pass-remarks='loop-interchange' -S -da-disable-delinearization-checks
+; RUN: cat %t |  FileCheck --check-prefix=DELIN %s
+
 @A = common global [100 x [100 x i32]] zeroinitializer
 @B = common global [100 x [100 x i32]] zeroinitializer
 @C = common global [100 x i32] zeroinitializer
@@ -61,6 +66,18 @@ for.end19:
 ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...
 
+; DELIN: --- !Missed
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            InterchangeNotProfitable
+; DELIN-NEXT: Function:        test01
+; DELIN-NEXT: Args:
+; DELIN-NEXT:   - String:          'Interchanging loops is too costly (cost='
+; DELIN-NEXT:   - Cost:            '2'
+; DELIN-NEXT:   - String:          ', threshold='
+; DELIN-NEXT:   - Threshold:       '0'
+; DELIN-NEXT:   - String:          ') and it does not improve parallelism.'
+; DELIN-NEXT: ...
+
 ;;--------------------------------------Test case 02------------------------------------
 ;; [FIXME] This loop though valid is currently not interchanged due to the
 ;; limitation that we cannot split the inner loop latch due to multiple use of inner induction
@@ -113,6 +130,14 @@ define void @test02(i32 %k, i32 %N) {
 ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...
 
+; DELIN: --- !Missed
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            UnsupportedInsBetweenInduction
+; DELIN-NEXT: Function:        test02
+; DELIN-NEXT: Args:
+; DELIN-NEXT:   - String:          Found unsupported instruction between induction variable increment and branch.
+; DELIN-NEXT: ...
+
 ;;-----------------------------------Test case 03-------------------------------
 ;; Test to make sure we can handle output dependencies.
 ;;
@@ -161,6 +186,14 @@ for.body4:                                        ; preds = %for.body4, %for.con
 ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...
 
+; DELIN: --- !Passed
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            Interchanged
+; DELIN-NEXT: Function:        test03
+; DELIN-NEXT: Args:
+; DELIN-NEXT:  - String:          Loop interchanged with enclosing loop.
+; DELIN-NEXT: ...
+
 ;;--------------------------------------Test case 04-------------------------------------
 ;; Loops not tightly nested are not interchanged
 ;;  for(int j=0;j<N;j++) {
@@ -215,3 +248,11 @@ for.end17:
 ; CHECK-NEXT: Args:
 ; CHECK-NEXT:   - String:          Cannot interchange loops due to dependences.
 ; CHECK-NEXT: ...
+
+; DELIN: --- !Missed
+; DELIN-NEXT: Pass:            loop-interchange
+; DELIN-NEXT: Name:            NotTightlyNested
+; DELIN-NEXT: Function:        test04
+; DELIN-NEXT: Args:
+; DELIN-NEXT:  - String:          Cannot interchange loops because they are not tightly nested.
+; DELIN-NEXT: ...

diff  --git a/llvm/test/Transforms/LoopInterchange/profitability.ll b/llvm/test/Transforms/LoopInterchange/profitability.ll
index 9fd217434c76..996ae61284ff 100644
--- a/llvm/test/Transforms/LoopInterchange/profitability.ll
+++ b/llvm/test/Transforms/LoopInterchange/profitability.ll
@@ -2,6 +2,11 @@
 ; RUN:     -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange
 ; RUN: FileCheck -input-file %t %s
 
+; RUN: opt < %s -loop-interchange -pass-remarks-output=%t -verify-dom-info -verify-loop-info \
+; RUN:     -pass-remarks=loop-interchange -pass-remarks-missed=loop-interchange \
+; RUN:     -da-disable-delinearization-checks
+; RUN: FileCheck --check-prefix=DELIN -input-file %t %s
+
 ;; We test profitability model in these test cases.
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -15,10 +20,13 @@ target triple = "x86_64-unknown-linux-gnu"
 ;;   for(int i=1;i<100;i++)
 ;;     for(int j=1;j<100;j++)
 ;;       A[j][i] = A[j - 1][i] + B[j][i];
-;; FIXME: DA misses this case after D35430
 
 ; CHECK:      Name:            Dependence
 ; CHECK-NEXT: Function:        interchange_01
+
+; DELIN:      Name:            Interchanged
+; DELIN-NEXT: Function:        interchange_01
+
 define void @interchange_01() {
 entry:
   br label %for2.preheader