[llvm] [DA] Test AddRecs are nsw before strong SIV test (PR #183421)

Ehsan Amiri via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 5 12:59:04 PST 2026


https://github.com/amehsan updated https://github.com/llvm/llvm-project/pull/183421

>From d72524aac1e4d428a0e853bc16bee080e19cf3d1 Mon Sep 17 00:00:00 2001
From: Ehsan Amiri <ehsan.amiri at huawei.com>
Date: Wed, 25 Feb 2026 13:50:59 -0500
Subject: [PATCH 1/6] [DA] Test AddRecs are nsw before strong SIV test

---
 llvm/lib/Analysis/DependenceAnalysis.cpp      |  3 +
 llvm/test/Analysis/DDG/basic-b.ll             | 23 ++-----
 .../Analysis/DependenceAnalysis/Banerjee.ll   |  4 +-
 .../Analysis/DependenceAnalysis/BasePtrBug.ll |  4 +-
 .../Analysis/DependenceAnalysis/DADelin.ll    | 20 ++----
 .../DependenceAnalysis/DifferentOffsets.ll    |  2 +-
 .../DependenceAnalysis/MIVCheckConst.ll       |  5 +-
 .../NonCanonicalizedSubscript.ll              |  2 +-
 .../DependenceAnalysis/Preliminary.ll         | 10 +--
 .../PreliminaryNoValidityCheckFixedSize.ll    |  4 +-
 .../DependenceAnalysis/Propagating.ll         |  8 +--
 .../SimpleSIVNoValidityCheck.ll               | 12 ++--
 .../Analysis/DependenceAnalysis/StrongSIV.ll  | 34 ++++------
 .../DependenceAnalysis/SymbolicRDIV.ll        | 36 +++++-----
 .../DependenceAnalysis/SymbolicSIV.ll         | 66 ++++++++-----------
 .../DependenceAnalysis/WeakCrossingSIV.ll     | 22 +++----
 .../DependenceAnalysis/WeakZeroDstSIV.ll      | 10 ++-
 .../DependenceAnalysis/WeakZeroSrcSIV.ll      | 10 ++-
 .../becount-couldnotcompute.ll                |  4 +-
 .../exact-rdiv-addrec-wrap.ll                 |  2 +-
 .../exact-siv-addrec-wrap.ll                  |  2 +-
 .../strong-siv-addrec-wrap.ll                 | 25 ++++---
 .../weak-crossing-siv-addrec-wrap.ll          | 25 +++----
 .../LoopFusion/da_separate_loops.ll           |  2 +-
 .../LoopInterchange/lcssa-preheader.ll        | 25 ++-----
 25 files changed, 155 insertions(+), 205 deletions(-)

diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index f2d8abf7aeef4..4f747ea1c3155 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -1242,6 +1242,9 @@ bool DependenceInfo::strongSIVtest(const SCEVAddRecExpr *Src,
   if (!isDependenceTestEnabled(DependenceTestType::StrongSIV))
     return false;
 
+  if (!Src->hasNoSignedWrap() || !Dst->hasNoSignedWrap())
+    return false;
+
   const SCEV *Coeff = Src->getStepRecurrence(*SE);
   assert(Coeff == Dst->getStepRecurrence(*SE) &&
          "Expecting same coefficient in Strong SIV test");
diff --git a/llvm/test/Analysis/DDG/basic-b.ll b/llvm/test/Analysis/DDG/basic-b.ll
index 66a6a83972406..239b17e089ea3 100644
--- a/llvm/test/Analysis/DDG/basic-b.ll
+++ b/llvm/test/Analysis/DDG/basic-b.ll
@@ -38,36 +38,27 @@
 ; CHECK-NEXT: Instructions:
 ; CHECK-NEXT:    %sub1 = add i64 %i.02, -1
 ; CHECK-NEXT:    %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %sub1
+; CHECK-NEXT:    %1 = load float, ptr %arrayidx2, align 4
 ; CHECK-NEXT: Edges:
-; CHECK-NEXT:  [def-use] to [[N8]]
+; CHECK-NEXT:  [def-use] to [[N9:0x[0-9a-f]*]]
+; CHECK-NEXT:  [memory] to [[N8]]
 
 ; CHECK: Node Address:[[N4]]:multi-instruction
 ; CHECK-NEXT: Instructions:
 ; CHECK-NEXT:    %arrayidx = getelementptr inbounds float, ptr %b, i64 %i.02
 ; CHECK-NEXT:    %0 = load float, ptr %arrayidx, align 4
 ; CHECK-NEXT: Edges:
-; CHECK-NEXT:  [def-use] to [[N8]]
-
-; CHECK: Node Address:[[N8]]:pi-block
-; CHECK-NEXT: --- start of nodes in pi-block ---
-; CHECK: Node Address:[[N9:0x[0-9a-f]*]]:single-instruction
-; CHECK-NEXT: Instructions:
-; CHECK-NEXT:    %1 = load float, ptr %arrayidx2, align 4
-; CHECK-NEXT: Edges:
-; CHECK-NEXT:  [def-use] to [[N10:0x[0-9a-f]*]]
+; CHECK-NEXT:  [def-use] to [[N9]]
 
-; CHECK: Node Address:[[N10]]:single-instruction
+; CHECK: Node Address:[[N9]]:single-instruction
 ; CHECK-NEXT: Instructions:
 ; CHECK-NEXT:    %add = fadd float %0, %1
 ; CHECK-NEXT: Edges:
-; CHECK-NEXT:  [def-use] to [[N11:0x[0-9a-f]*]]
+; CHECK-NEXT:  [def-use] to [[N8:0x[0-9a-f]*]]
 
-; CHECK: Node Address:[[N11]]:single-instruction
+; CHECK: Node Address:[[N8]]:single-instruction
 ; CHECK-NEXT: Instructions:
 ; CHECK-NEXT:    store float %add, ptr %arrayidx3, align 4
-; CHECK-NEXT: Edges:
-; CHECK-NEXT:  [memory] to [[N9]]
-; CHECK-NEXT:--- end of nodes in pi-block ---
 ; CHECK-NEXT: Edges:none!
 
 
diff --git a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
index 6dde8844c6040..9fe574dd94ef7 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll
@@ -52,11 +52,11 @@ define void @banerjee0(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp {
 ; DELIN-NEXT:  Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
 ; DELIN-NEXT:    da analyze - confused!
 ; DELIN-NEXT:  Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8
-; DELIN-NEXT:    da analyze - none!
+; DELIN-NEXT:    da analyze - consistent input [0 *]!
 ; DELIN-NEXT:  Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
 ; DELIN-NEXT:    da analyze - confused!
 ; DELIN-NEXT:  Src: store i64 %0, ptr %B.addr.11, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8
-; DELIN-NEXT:    da analyze - none!
+; DELIN-NEXT:    da analyze - consistent output [0 *]!
 ;
 entry:
   br label %for.cond1.preheader
diff --git a/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll b/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll
index 81e461a5e092d..18544005cbb00 100644
--- a/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll
@@ -18,11 +18,11 @@ define void @test1(ptr nocapture %A, ptr nocapture %B, i32 %N) #0 {
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %gep.0, align 4 --> Dst: %0 = load i32, ptr %gep.0, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %gep.0, align 4 --> Dst: %1 = load i32, ptr %gep.1, align 4
-; CHECK-NEXT:    da analyze - input [*|<]!
+; CHECK-NEXT:    da analyze - consistent input [*|<]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %gep.0, align 4 --> Dst: store i32 %add, ptr %gep.B, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %1 = load i32, ptr %gep.1, align 4 --> Dst: %1 = load i32, ptr %gep.1, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %1 = load i32, ptr %gep.1, align 4 --> Dst: store i32 %add, ptr %gep.B, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %add, ptr %gep.B, align 4 --> Dst: store i32 %add, ptr %gep.B, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
index 2f43f8d705cb2..6195ad7c57207 100644
--- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
@@ -648,15 +648,11 @@ exit:
 define void @coeff_may_negative(ptr %a, i32 %k) {
 ; CHECK-LABEL: 'coeff_may_negative'
 ; CHECK-NEXT:  Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.0, align 1
-; CHECK-NEXT:    da analyze - consistent output [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: %k ne) 0
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
-; CHECK-NEXT:    da analyze - output [*|<]!
+; CHECK-NEXT:    da analyze - consistent output [*|<]!
 ; CHECK-NEXT:  Src: store i8 42, ptr %idx.1, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
-; CHECK-NEXT:    da analyze - consistent output [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: %k ne) 0
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   br label %loop
@@ -691,15 +687,11 @@ exit:
 define void @coeff_positive(ptr %a, i32 %k) {
 ; CHECK-LABEL: 'coeff_positive'
 ; CHECK-NEXT:  Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.0, align 1
-; CHECK-NEXT:    da analyze - consistent output [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: %k ne) 0
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
-; CHECK-NEXT:    da analyze - output [*|<]!
+; CHECK-NEXT:    da analyze - consistent output [*|<]!
 ; CHECK-NEXT:  Src: store i8 42, ptr %idx.1, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
-; CHECK-NEXT:    da analyze - consistent output [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: %k ne) 0
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   br label %loop
diff --git a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
index 3360e603eb406..d6e2eb2dcb572 100644
--- a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
@@ -148,7 +148,7 @@ end:
 define void @multidim_accesses(ptr %A) {
 ; CHECK-LABEL: 'multidim_accesses'
 ; CHECK-NEXT:  Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx0, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [0 0 *]!
 ; CHECK-NEXT:  Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4
 ; CHECK-NEXT:    da analyze - output [<= * *|<]!
 ; CHECK-NEXT:  Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll
index d91a916d785c2..64f75dcf1522e 100644
--- a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll
@@ -39,18 +39,17 @@ define void @test(ptr %A, ptr %B, i1 %arg, i32 %n, i32 %m) align 2 {
 ; CHECK-NEXT:  Src: %v1 = load i32, ptr %B, align 4 --> Dst: %v32 = load <32 x i32>, ptr %v30, align 128
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %v27 = load <32 x i32>, ptr %v25, align 256 --> Dst: %v27 = load <32 x i32>, ptr %v25, align 256
-; CHECK-NEXT:    da analyze - consistent input [0 S S]!
+; CHECK-NEXT:    da analyze - consistent input [* S S]!
 ; CHECK-NEXT:    Runtime Assumptions:
 ; CHECK-NEXT:    Equal predicate: (zext i7 (4 * (trunc i32 %v1 to i7) * (1 + (trunc i32 %n to i7))) to i32) == 0
 ; CHECK-NEXT:    Equal predicate: (8 * (zext i4 (trunc i32 %v1 to i4) to i32))<nuw><nsw> == 0
-; CHECK-NEXT:    Compare predicate: (8 * %v1) ne) 0
 ; CHECK-NEXT:  Src: %v27 = load <32 x i32>, ptr %v25, align 256 --> Dst: %v32 = load <32 x i32>, ptr %v30, align 128
 ; CHECK-NEXT:    da analyze - input [* S S|<]!
 ; CHECK-NEXT:    Runtime Assumptions:
 ; CHECK-NEXT:    Equal predicate: (zext i7 (4 * (trunc i32 %v1 to i7) * (1 + (trunc i32 %n to i7))) to i32) == 0
 ; CHECK-NEXT:    Equal predicate: (8 * (zext i4 (trunc i32 %v1 to i4) to i32))<nuw><nsw> == 0
 ; CHECK-NEXT:  Src: %v32 = load <32 x i32>, ptr %v30, align 128 --> Dst: %v32 = load <32 x i32>, ptr %v30, align 128
-; CHECK-NEXT:    da analyze - consistent input [0 S S]!
+; CHECK-NEXT:    da analyze - consistent input [* S S]!
 ;
 entry:
   %v1 = load i32, ptr %B, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
index 0cee9aa90236a..1db0d4170ee09 100644
--- a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll
@@ -61,7 +61,7 @@ define void @coupled_miv_type_mismatch(i32 %n) {
 ; CHECK-NEXT:  Src: %2 = load i32, ptr %arrayidx5, align 4 --> Dst: store i32 %add6, ptr %arrayidx10, align 4
 ; CHECK-NEXT:    da analyze - anti [< >]!
 ; CHECK-NEXT:  Src: store i32 %add6, ptr %arrayidx10, align 4 --> Dst: store i32 %add6, ptr %arrayidx10, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [0 *]!
 ;
 entry:
   br label %for.cond
diff --git a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
index 8cb0e2ac770dc..de525d7c0cd2f 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Preliminary.ll
@@ -448,7 +448,7 @@ define void @p4(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx5, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp sgt i64 %n, 0
@@ -501,7 +501,7 @@ define void @p5(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx5, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp sgt i64 %n, 0
@@ -727,11 +727,11 @@ entry:
 define void @foo(ptr %s, i32 %size) nounwind uwtable ssp {
 ; CHECK-LABEL: 'foo'
 ; CHECK-NEXT:  Src: %1 = load i32, ptr %0, align 4 --> Dst: %1 = load i32, ptr %0, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %1 = load i32, ptr %0, align 4 --> Dst: store i32 %1, ptr %i.02, align 4
-; CHECK-NEXT:    da analyze - consistent anti [1]!
+; CHECK-NEXT:    da analyze - consistent anti [*|<]!
 ; CHECK-NEXT:  Src: store i32 %1, ptr %i.02, align 4 --> Dst: store i32 %1, ptr %i.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %idx.ext = zext i32 %size to i64
diff --git a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
index 8cd29e691a7ef..0cf2f8049f2a0 100644
--- a/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/PreliminaryNoValidityCheckFixedSize.ll
@@ -20,11 +20,11 @@ define void @p2(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i64 %i.011, ptr %arrayidx8, align 8 --> Dst: store i64 %i.011, ptr %arrayidx8, align 8
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i64 %i.011, ptr %arrayidx8, align 8 --> Dst: %0 = load i64, ptr %arrayidx17, align 8
-; CHECK-NEXT:    da analyze - flow [-3 -2] / assuming 1 loop level(s) fused: [-3 -2 -1]!
+; CHECK-NEXT:    da analyze - flow [* *|<] / assuming 1 loop level(s) fused: [* * -1|<]!
 ; CHECK-NEXT:  Src: store i64 %i.011, ptr %arrayidx8, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i64, ptr %arrayidx17, align 8 --> Dst: %0 = load i64, ptr %arrayidx17, align 8
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [* * 0]!
 ; CHECK-NEXT:  Src: %0 = load i64, ptr %arrayidx17, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i64 %0, ptr %B.addr.24, align 8 --> Dst: store i64 %0, ptr %B.addr.24, align 8
diff --git a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll
index 17f2de43cc611..d3b4e7d94d1cf 100644
--- a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll
@@ -435,13 +435,13 @@ for.end14:                                        ; preds = %for.inc12
 define void @prop7(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'prop7'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: store i32 %conv, ptr %arrayidx7, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [0 *]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx13, align 4
-; CHECK-NEXT:    da analyze - flow [* -38]!
+; CHECK-NEXT:    da analyze - flow [* *|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx13, align 4 --> Dst: %0 = load i32, ptr %arrayidx13, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [0 *]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx13, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.11, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
@@ -503,7 +503,7 @@ define void @prop8(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx9, align 4 --> Dst: %0 = load i32, ptr %arrayidx9, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [0 *]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx9, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.11, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll
index 181a4494b036e..b04a206255f90 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll
@@ -78,11 +78,11 @@ for.end14:                                        ; preds = %entry, %for.inc12
 define void @t2(i32 signext %n, i32 signext %m, ptr %a) {
 ; CHECK-LABEL: 't2'
 ; CHECK-NEXT:  Src: %21 = load i32, ptr %arrayidx28, align 4 --> Dst: %21 = load i32, ptr %arrayidx28, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [0 0 0 * 0]!
 ; CHECK-NEXT:  Src: %21 = load i32, ptr %arrayidx28, align 4 --> Dst: store i32 %21, ptr %arrayidx38, align 4
-; CHECK-NEXT:    da analyze - consistent anti [1 -2 0 -3 2]!
+; CHECK-NEXT:    da analyze - consistent anti [1 -2 0 * 2]!
 ; CHECK-NEXT:  Src: store i32 %21, ptr %arrayidx38, align 4 --> Dst: store i32 %21, ptr %arrayidx38, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [0 0 0 * 0]!
 ;
 ; LIN-LABEL: 't2'
 ; LIN-NEXT:  Src: %21 = load i32, ptr %arrayidx28, align 4 --> Dst: %21 = load i32, ptr %arrayidx28, align 4
@@ -208,11 +208,11 @@ for.end50:                                        ; preds = %entry, %for.inc48
 define void @t3(i64 %n, i64 %m, i64 %lb, ptr %a) {
 ; CHECK-LABEL: 't3'
 ; CHECK-NEXT:  Src: %2 = load i32, ptr %arrayidx6, align 4 --> Dst: %2 = load i32, ptr %arrayidx6, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [* *]!
 ; CHECK-NEXT:  Src: %2 = load i32, ptr %arrayidx6, align 4 --> Dst: store i32 %2, ptr %arrayidx8, align 4
-; CHECK-NEXT:    da analyze - anti [1 *]!
+; CHECK-NEXT:    da analyze - consistent anti [* *|<]!
 ; CHECK-NEXT:  Src: store i32 %2, ptr %arrayidx8, align 4 --> Dst: store i32 %2, ptr %arrayidx8, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [* *]!
 ;
 ; LIN-LABEL: 't3'
 ; LIN-NEXT:  Src: %2 = load i32, ptr %arrayidx6, align 4 --> Dst: %2 = load i32, ptr %arrayidx6, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
index 0d73ada53ed18..269fd8e54bab2 100644
--- a/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/StrongSIV.ll
@@ -114,17 +114,17 @@ for.end:                                          ; preds = %for.end.loopexit, %
 define void @strong2(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'strong2'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx1, align 4
-; CHECK-NEXT:    da analyze - consistent flow [2]!
+; CHECK-NEXT:    da analyze - consistent flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx1, align 4 --> Dst: %0 = load i32, ptr %arrayidx1, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx1, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -385,9 +385,9 @@ for.end:                                          ; preds = %for.body
 define void @strong8(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'strong8'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx1, align 4
-; CHECK-NEXT:    da analyze - flow [*|<]!
+; CHECK-NEXT:    da analyze - consistent flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx1, align 4 --> Dst: %0 = load i32, ptr %arrayidx1, align 4
@@ -427,17 +427,17 @@ for.end:                                          ; preds = %for.body
 define void @strong9(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'strong9'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
-; CHECK-NEXT:    da analyze - flow [*|<]!
+; CHECK-NEXT:    da analyze - consistent flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -478,19 +478,13 @@ for.end:                                          ; preds = %for.end.loopexit, %
 define void @strong10(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'strong10'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - consistent output [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: (4 * %n) ne) 0
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4
-; CHECK-NEXT:    da analyze - consistent flow [0|<]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: (4 * %n) ne) 0
+; CHECK-NEXT:    da analyze - consistent flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4
-; CHECK-NEXT:    da analyze - consistent input [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: (4 * %n) ne) 0
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.01, align 4 --> Dst: store i32 %0, ptr %B.addr.01, align 4
@@ -534,7 +528,7 @@ define void @strong11(ptr %A) nounwind uwtable ssp {
 ;
 ; CHECK-STRONG-SIV-LABEL: 'strong11'
 ; CHECK-STRONG-SIV-NEXT:  Src: store i32 0, ptr %arrayidx, align 4 --> Dst: store i32 0, ptr %arrayidx, align 4
-; CHECK-STRONG-SIV-NEXT:    da analyze - consistent output [0 S]!
+; CHECK-STRONG-SIV-NEXT:    da analyze - consistent output [* S]!
 ;
 entry:
   br label %for.cond1.preheader
diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
index e4fe577af1486..4e14f97240cf2 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicRDIV.ll
@@ -15,17 +15,17 @@ target triple = "x86_64-apple-macosx10.6.0"
 define void @symbolicrdiv0(ptr %A, ptr %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicrdiv0'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx8, align 4
 ; CHECK-NEXT:    da analyze - flow [|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx8, align 4 --> Dst: %0 = load i32, ptr %arrayidx8, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx8, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp4 = icmp eq i64 %n1, 0
@@ -84,17 +84,17 @@ for.end11:                                        ; preds = %for.end11.loopexit,
 define void @symbolicrdiv1(ptr %A, ptr %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicrdiv1'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx9, align 4
 ; CHECK-NEXT:    da analyze - flow [|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx9, align 4 --> Dst: %0 = load i32, ptr %arrayidx9, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx9, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp4 = icmp eq i64 %n1, 0
@@ -155,17 +155,17 @@ for.end12:                                        ; preds = %for.end12.loopexit,
 define void @symbolicrdiv2(ptr %A, ptr %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicrdiv2'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
 ; CHECK-NEXT:    da analyze - flow [|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp4 = icmp eq i64 %n1, 0
@@ -224,17 +224,17 @@ for.end10:                                        ; preds = %for.end10.loopexit,
 define void @symbolicrdiv3(ptr %A, ptr %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicrdiv3'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx6, align 4
 ; CHECK-NEXT:    da analyze - flow [|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx6, align 4 --> Dst: %0 = load i32, ptr %arrayidx6, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx6, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp4 = icmp eq i64 %n1, 0
@@ -291,17 +291,17 @@ for.end9:                                         ; preds = %for.end9.loopexit,
 define void @symbolicrdiv4(ptr %A, ptr %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicrdiv4'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
 ; CHECK-NEXT:    da analyze - flow [|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp4 = icmp eq i64 %n1, 0
@@ -359,17 +359,17 @@ for.end10:                                        ; preds = %for.end10.loopexit,
 define void @symbolicrdiv5(ptr %A, ptr %B, i64 %n1, i64 %n2) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicrdiv5'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
 ; CHECK-NEXT:    da analyze - flow [|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp4 = icmp eq i64 %n1, 0
diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
index 60bb065e4eaca..9117673336a0b 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll
@@ -13,17 +13,17 @@ target triple = "x86_64-apple-macosx10.6.0"
 define void @symbolicsiv0(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicsiv0'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4
 ; CHECK-NEXT:    da analyze - flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -65,17 +65,17 @@ for.end:                                          ; preds = %for.end.loopexit, %
 define void @symbolicsiv1(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicsiv1'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx5, align 4
 ; CHECK-NEXT:    da analyze - flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx5, align 4 --> Dst: %0 = load i32, ptr %arrayidx5, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx5, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -119,17 +119,17 @@ for.end:                                          ; preds = %for.end.loopexit, %
 define void @symbolicsiv2(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicsiv2'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4
 ; CHECK-NEXT:    da analyze - flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -171,17 +171,17 @@ for.end:                                          ; preds = %for.end.loopexit, %
 define void @symbolicsiv3(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicsiv3'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4
 ; CHECK-NEXT:    da analyze - flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -224,17 +224,17 @@ for.end:                                          ; preds = %for.end.loopexit, %
 define void @symbolicsiv4(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicsiv4'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4
 ; CHECK-NEXT:    da analyze - flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: %0 = load i32, ptr %arrayidx3, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx3, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -276,17 +276,17 @@ for.end:                                          ; preds = %for.end.loopexit, %
 define void @symbolicsiv5(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicsiv5'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4
 ; CHECK-NEXT:    da analyze - flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx4, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -330,17 +330,17 @@ for.end:                                          ; preds = %for.end.loopexit, %
 define void @weaktest(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'weaktest'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
 ; CHECK-NEXT:    da analyze - flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -381,21 +381,17 @@ for.end:                                          ; preds = %for.end.loopexit, %
 define void @symbolicsiv6(ptr %A, ptr %B, i64 %n, i64 %N, i64 %M) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicsiv6'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - consistent output [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: (16 * %N) ne) 0
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
-; CHECK-NEXT:    da analyze - flow [*|<]!
+; CHECK-NEXT:    da analyze - consistent flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4
-; CHECK-NEXT:    da analyze - consistent input [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: (16 * %N) ne) 0
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -441,21 +437,17 @@ for.end:                                          ; preds = %for.end.loopexit, %
 define void @symbolicsiv7(ptr %A, ptr %B, i64 %n, i64 %N, i64 %M) nounwind uwtable ssp {
 ; CHECK-LABEL: 'symbolicsiv7'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - consistent output [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: (8 * %N) ne) 0
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %1 = load i32, ptr %arrayidx6, align 4
-; CHECK-NEXT:    da analyze - flow [*|<]!
+; CHECK-NEXT:    da analyze - consistent flow [*|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %1, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %1 = load i32, ptr %arrayidx6, align 4 --> Dst: %1 = load i32, ptr %arrayidx6, align 4
-; CHECK-NEXT:    da analyze - consistent input [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: (8 * %N) ne) 0
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %1 = load i32, ptr %arrayidx6, align 4 --> Dst: store i32 %1, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %1, ptr %B.addr.02, align 4 --> Dst: store i32 %1, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
index c7accfd46a4d7..b7657339fb5fe 100644
--- a/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/WeakCrossingSIV.ll
@@ -14,21 +14,17 @@ target triple = "x86_64-apple-macosx10.6.0"
 define void @weakcrossing0(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'weakcrossing0'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - consistent output [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: (4 * %n) ne) 0
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
 ; CHECK-NEXT:    da analyze - flow [0|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
-; CHECK-NEXT:    da analyze - consistent input [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: (-4 * %n) ne) 0
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -70,17 +66,17 @@ for.end:                                          ; preds = %for.end.loopexit, %
 define void @weakcrossing1(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'weakcrossing1'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
 ; CHECK-NEXT:    da analyze - flow [<>]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: %0 = load i32, ptr %arrayidx2, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx2, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -247,17 +243,17 @@ for.end:                                          ; preds = %for.body
 define void @weakcrossing5(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'weakcrossing5'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %1 = load i32, ptr %arrayidx2, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %1, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %1 = load i32, ptr %arrayidx2, align 4 --> Dst: %1 = load i32, ptr %arrayidx2, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %1 = load i32, ptr %arrayidx2, align 4 --> Dst: store i32 %1, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %1, ptr %B.addr.02, align 4 --> Dst: store i32 %1, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
index f8a045c425029..51d21c21fb57a 100644
--- a/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/WeakZeroDstSIV.ll
@@ -90,9 +90,7 @@ for.end:                                          ; preds = %for.body
 define void @weakzerodst1(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'weakzerodst1'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - consistent output [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: (4 * %n) ne) 0
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx1, align 4
 ; CHECK-NEXT:    da analyze - flow [p<=|<]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
@@ -102,7 +100,7 @@ define void @weakzerodst1(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx1, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -310,7 +308,7 @@ for.end:                                          ; preds = %for.body
 define void @weakzerodst6(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-LABEL: 'weakzerodst6'
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx1, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
@@ -320,7 +318,7 @@ define void @weakzerodst6(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx1, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
diff --git a/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll b/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
index 4ed0abd8d98a9..4c2e062643e2a 100644
--- a/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/WeakZeroSrcSIV.ll
@@ -94,13 +94,11 @@ define void @weakzerosrc1(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx1, align 4 --> Dst: %0 = load i32, ptr %arrayidx1, align 4
-; CHECK-NEXT:    da analyze - consistent input [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: (4 * %n) ne) 0
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx1, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
@@ -314,11 +312,11 @@ define void @weakzerosrc6(ptr %A, ptr %B, i64 %n) nounwind uwtable ssp {
 ; CHECK-NEXT:  Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx1, align 4 --> Dst: %0 = load i32, ptr %arrayidx1, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx1, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
 ; CHECK-NEXT:    da analyze - confused!
 ; CHECK-NEXT:  Src: store i32 %0, ptr %B.addr.02, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   %cmp1 = icmp eq i64 %n, 0
diff --git a/llvm/test/Analysis/DependenceAnalysis/becount-couldnotcompute.ll b/llvm/test/Analysis/DependenceAnalysis/becount-couldnotcompute.ll
index 1674badd4d6b9..5672e2c7b7c8b 100644
--- a/llvm/test/Analysis/DependenceAnalysis/becount-couldnotcompute.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/becount-couldnotcompute.ll
@@ -7,9 +7,7 @@
 define void @test(i64 %conv, ptr %a) {
 ; CHECK-LABEL: 'test'
 ; CHECK-NEXT:  Src: %ld = load i32, ptr %arrayidx12, align 4 --> Dst: %ld = load i32, ptr %arrayidx12, align 4
-; CHECK-NEXT:    da analyze - consistent input [0]!
-; CHECK-NEXT:    Runtime Assumptions:
-; CHECK-NEXT:    Compare predicate: (4 + (4 * %conv)) ne) 0
+; CHECK-NEXT:    da analyze - consistent input [*]!
 ;
 entry:
   %sub = add i64 %conv, 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/exact-rdiv-addrec-wrap.ll b/llvm/test/Analysis/DependenceAnalysis/exact-rdiv-addrec-wrap.ll
index ec974e0f070fb..31ad4f6c17b01 100644
--- a/llvm/test/Analysis/DependenceAnalysis/exact-rdiv-addrec-wrap.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/exact-rdiv-addrec-wrap.ll
@@ -28,7 +28,7 @@
 define void @exact_rdiv_no_nsw(ptr %A) {
 ; CHECK-ALL-LABEL: 'exact_rdiv_no_nsw'
 ; CHECK-ALL-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 0, ptr %gep.0, align 1
-; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:    da analyze - consistent output [*]!
 ; CHECK-ALL-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
 ; CHECK-ALL-NEXT:    da analyze - none!
 ; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.1, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/exact-siv-addrec-wrap.ll b/llvm/test/Analysis/DependenceAnalysis/exact-siv-addrec-wrap.ll
index f8a63333183a4..7a1ebb910a865 100644
--- a/llvm/test/Analysis/DependenceAnalysis/exact-siv-addrec-wrap.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/exact-siv-addrec-wrap.ll
@@ -26,7 +26,7 @@
 define void @exact_siv_no_nsw(ptr %A) {
 ; CHECK-ALL-LABEL: 'exact_siv_no_nsw'
 ; CHECK-ALL-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 0, ptr %gep.0, align 1
-; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:    da analyze - consistent output [*]!
 ; CHECK-ALL-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
 ; CHECK-ALL-NEXT:    da analyze - none!
 ; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.1, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
diff --git a/llvm/test/Analysis/DependenceAnalysis/strong-siv-addrec-wrap.ll b/llvm/test/Analysis/DependenceAnalysis/strong-siv-addrec-wrap.ll
index 05807458d4de5..128acc4504d26 100644
--- a/llvm/test/Analysis/DependenceAnalysis/strong-siv-addrec-wrap.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/strong-siv-addrec-wrap.ll
@@ -24,13 +24,21 @@
 ; stores.
 ;
 define void @strong_siv_no_nsw(ptr %A) {
-; CHECK-LABEL: 'strong_siv_no_nsw'
-; CHECK-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 0, ptr %gep.0, align 1
-; CHECK-NEXT:    da analyze - none!
-; CHECK-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
-; CHECK-NEXT:    da analyze - none!
-; CHECK-NEXT:  Src: store i8 1, ptr %gep.1, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
-; CHECK-NEXT:    da analyze - none!
+; CHECK-ALL-LABEL: 'strong_siv_no_nsw'
+; CHECK-ALL-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 0, ptr %gep.0, align 1
+; CHECK-ALL-NEXT:    da analyze - consistent output [*]!
+; CHECK-ALL-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
+; CHECK-ALL-NEXT:    da analyze - none!
+; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.1, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
+; CHECK-ALL-NEXT:    da analyze - consistent output [*]!
+;
+; CHECK-STRONG-SIV-LABEL: 'strong_siv_no_nsw'
+; CHECK-STRONG-SIV-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 0, ptr %gep.0, align 1
+; CHECK-STRONG-SIV-NEXT:    da analyze - consistent output [*]!
+; CHECK-STRONG-SIV-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
+; CHECK-STRONG-SIV-NEXT:    da analyze - consistent output [*|<]!
+; CHECK-STRONG-SIV-NEXT:  Src: store i8 1, ptr %gep.1, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
+; CHECK-STRONG-SIV-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   br label %loop.header
@@ -67,5 +75,4 @@ exit:
   ret void
 }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-ALL: {{.*}}
-; CHECK-STRONG-SIV: {{.*}}
+; CHECK: {{.*}}
diff --git a/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-addrec-wrap.ll b/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-addrec-wrap.ll
index e0f3eda30e63d..4596e66a83ba7 100644
--- a/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-addrec-wrap.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/weak-crossing-siv-addrec-wrap.ll
@@ -24,21 +24,13 @@
 ; stores.
 ;
 define void @weak_crossing_siv_no_nsw(ptr %A) {
-; CHECK-ALL-LABEL: 'weak_crossing_siv_no_nsw'
-; CHECK-ALL-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 0, ptr %gep.0, align 1
-; CHECK-ALL-NEXT:    da analyze - none!
-; CHECK-ALL-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
-; CHECK-ALL-NEXT:    da analyze - none!
-; CHECK-ALL-NEXT:  Src: store i8 1, ptr %gep.1, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
-; CHECK-ALL-NEXT:    da analyze - none!
-;
-; CHECK-WEAK-CROSSING-SIV-LABEL: 'weak_crossing_siv_no_nsw'
-; CHECK-WEAK-CROSSING-SIV-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 0, ptr %gep.0, align 1
-; CHECK-WEAK-CROSSING-SIV-NEXT:    da analyze - consistent output [*]!
-; CHECK-WEAK-CROSSING-SIV-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
-; CHECK-WEAK-CROSSING-SIV-NEXT:    da analyze - none!
-; CHECK-WEAK-CROSSING-SIV-NEXT:  Src: store i8 1, ptr %gep.1, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
-; CHECK-WEAK-CROSSING-SIV-NEXT:    da analyze - consistent output [*]!
+; CHECK-LABEL: 'weak_crossing_siv_no_nsw'
+; CHECK-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 0, ptr %gep.0, align 1
+; CHECK-NEXT:    da analyze - consistent output [*]!
+; CHECK-NEXT:  Src: store i8 0, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
+; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Src: store i8 1, ptr %gep.1, align 1 --> Dst: store i8 1, ptr %gep.1, align 1
+; CHECK-NEXT:    da analyze - consistent output [*]!
 ;
 entry:
   br label %loop.header
@@ -75,4 +67,5 @@ exit:
   ret void
 }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK: {{.*}}
+; CHECK-ALL: {{.*}}
+; CHECK-WEAK-CROSSING-SIV: {{.*}}
diff --git a/llvm/test/Transforms/LoopFusion/da_separate_loops.ll b/llvm/test/Transforms/LoopFusion/da_separate_loops.ll
index 6359f48199290..22689eda88561 100644
--- a/llvm/test/Transforms/LoopFusion/da_separate_loops.ll
+++ b/llvm/test/Transforms/LoopFusion/da_separate_loops.ll
@@ -1,7 +1,7 @@
 ; REQUIRES: asserts
 
 ; RUN: opt -passes=loop-fusion -da-disable-delinearization-checks -disable-output -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
-; STAT: 2 loop-fusion - DA checks passed
+; STAT: 2 loop-fusion - Dependencies prevent fusion
 
 ; The two inner loops have no dependency and are allowed to be fused as in the
 ; outer loops, different levels are accessed to.
diff --git a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
index 3969511fad65e..c9bf1d9f7126b 100644
--- a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
+++ b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
@@ -175,35 +175,24 @@ define void @test2(i32 %N) {
 ;
 ; CHECK-DELIN-LABEL: define void @test2(
 ; CHECK-DELIN-SAME: i32 [[N:%.*]]) {
-; CHECK-DELIN-NEXT:  [[BB:.*:]]
-; CHECK-DELIN-NEXT:    br label %[[INNER_PREHEADER:.*]]
-; CHECK-DELIN:       [[OUTER_HEADER_PREHEADER:.*]]:
+; CHECK-DELIN-NEXT:  [[OUTER_HEADER_PREHEADER:.*]]:
 ; CHECK-DELIN-NEXT:    br label %[[OUTER_HEADER:.*]]
 ; CHECK-DELIN:       [[OUTER_HEADER]]:
-; CHECK-DELIN-NEXT:    [[OUTER_IV:%.*]] = phi i64 [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ], [ 0, %[[OUTER_HEADER_PREHEADER]] ]
+; CHECK-DELIN-NEXT:    [[OUTER_IV:%.*]] = phi i64 [ 0, %[[OUTER_HEADER_PREHEADER]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
 ; CHECK-DELIN-NEXT:    [[N_EXT:%.*]] = sext i32 [[N]] to i64
 ; CHECK-DELIN-NEXT:    br label %[[INNER_SPLIT1:.*]]
-; CHECK-DELIN:       [[INNER_PREHEADER]]:
-; CHECK-DELIN-NEXT:    br label %[[INNER:.*]]
-; CHECK-DELIN:       [[INNER]]:
-; CHECK-DELIN-NEXT:    [[INNER_IV:%.*]] = phi i64 [ [[TMP0:%.*]], %[[INNER_SPLIT:.*]] ], [ 0, %[[INNER_PREHEADER]] ]
-; CHECK-DELIN-NEXT:    br label %[[OUTER_HEADER_PREHEADER]]
 ; CHECK-DELIN:       [[INNER_SPLIT1]]:
+; CHECK-DELIN-NEXT:    [[INNER_IV:%.*]] = phi i64 [ 0, %[[OUTER_HEADER]] ], [ [[TMP0:%.*]], %[[INNER_SPLIT1]] ]
 ; CHECK-DELIN-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x [4 x [2 x i16]]], ptr @global, i64 0, i64 [[INNER_IV]], i64 [[OUTER_IV]], i64 0
 ; CHECK-DELIN-NEXT:    store i16 0, ptr [[TMP8]], align 2
-; CHECK-DELIN-NEXT:    [[INNER_IV_NEXT:%.*]] = add nsw i64 [[INNER_IV]], 1
-; CHECK-DELIN-NEXT:    [[C_1:%.*]] = icmp ne i64 [[INNER_IV_NEXT]], [[N_EXT]]
-; CHECK-DELIN-NEXT:    br label %[[OUTER_LATCH]]
-; CHECK-DELIN:       [[INNER_SPLIT]]:
-; CHECK-DELIN-NEXT:    [[N_EXT_LCSSA:%.*]] = phi i64 [ [[N_EXT]], %[[OUTER_LATCH]] ]
 ; CHECK-DELIN-NEXT:    [[TMP0]] = add nsw i64 [[INNER_IV]], 1
-; CHECK-DELIN-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[TMP0]], [[N_EXT_LCSSA]]
-; CHECK-DELIN-NEXT:    br i1 [[TMP1]], label %[[INNER]], label %[[EXIT:.*]]
+; CHECK-DELIN-NEXT:    [[C_1:%.*]] = icmp ne i64 [[TMP0]], [[N_EXT]]
+; CHECK-DELIN-NEXT:    br i1 [[C_1]], label %[[INNER_SPLIT1]], label %[[OUTER_LATCH]]
 ; CHECK-DELIN:       [[OUTER_LATCH]]:
 ; CHECK-DELIN-NEXT:    [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1
 ; CHECK-DELIN-NEXT:    [[C_2:%.*]] = icmp ne i64 [[OUTER_IV]], [[N_EXT]]
-; CHECK-DELIN-NEXT:    br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[INNER_SPLIT]]
-; CHECK-DELIN:       [[EXIT]]:
+; CHECK-DELIN-NEXT:    br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[INNER_SPLIT:.*]]
+; CHECK-DELIN:       [[INNER_SPLIT]]:
 ; CHECK-DELIN-NEXT:    ret void
 ;
 bb:

>From 261201b0105bfbfa9677c8e35c215cd26501e1c7 Mon Sep 17 00:00:00 2001
From: Ehsan Amiri <ehsan.amiri at huawei.com>
Date: Fri, 27 Feb 2026 14:31:48 -0500
Subject: [PATCH 2/6] fix the failing testcase in unroll and jam

---
 .../LoopUnrollAndJam/unroll-and-jam.ll        | 452 ++----------------
 1 file changed, 51 insertions(+), 401 deletions(-)

diff --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
index 93e5f82c824ef..06b2b6e3ed806 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
@@ -188,136 +188,29 @@ define void @test2(i32 %N, i32 %M, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[M]], 0
 ; CHECK-NEXT:    [[CMP125:%.*]] = icmp ne i32 [[N]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP]], [[CMP125]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label %[[FOR_OUTER_PREHEADER:.*]], label %[[FOR_END10:.*]]
-; CHECK:       [[FOR_OUTER_PREHEADER]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[N]], 3
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_OUTER_PREHEADER_NEW:.*]]
-; CHECK:       [[FOR_OUTER_PREHEADER_NEW]]:
-; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[N]], [[XTRAITER]]
-; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
-; CHECK:       [[FOR_OUTER]]:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD9_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9:%.*]] = add nuw nsw i32 [[I]], 1
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_1:%.*]] = add nuw nsw i32 [[I]], 2
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_1]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_2:%.*]] = add nuw nsw i32 [[I]], 3
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_3]] = add nuw i32 [[I]], 4
-; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER]], 4
-; CHECK-NEXT:    br label %[[FOR_INNER:.*]]
-; CHECK:       [[FOR_INNER]]:
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ [[TMP2]], %[[FOR_OUTER]] ], [ [[ADD:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_1:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ [[TMP3]], %[[FOR_OUTER]] ], [ [[ADD_1:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_2:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ [[TMP4]], %[[FOR_OUTER]] ], [ [[ADD_2:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_3:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ [[TMP5]], %[[FOR_OUTER]] ], [ [[ADD_3:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD]] = add i32 [[TMP6]], [[SUM]]
-; CHECK-NEXT:    [[INC]] = add nuw i32 [[J]], 1
-; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX6_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_1]] = add i32 [[TMP7]], [[SUM_1]]
-; CHECK-NEXT:    [[INC_1]] = add nuw i32 [[J_1]], 1
-; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_2]] = add i32 [[TMP8]], [[SUM_2]]
-; CHECK-NEXT:    [[INC_2]] = add nuw i32 [[J_2]], 1
-; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_3]] = add i32 [[TMP9]], [[SUM_3]]
-; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[J_3]], 1
-; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[INC_3]], [[M]]
-; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]]
-; CHECK:       [[FOR_LATCH]]:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_1:%.*]] = phi i32 [ [[ADD_1]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_2:%.*]] = phi i32 [ [[ADD_2]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD_LCSSA_3:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END10_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP6:![0-9]+]]
-; CHECK:       [[FOR_END10_LOOPEXIT_UNR_LCSSA]]:
-; CHECK-NEXT:    [[I_UNR1:%.*]] = phi i32 [ [[ADD9_3]], %[[FOR_LATCH]] ]
-; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER]], label %[[FOR_END10_LOOPEXIT:.*]]
-; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTER_PREHEADER]] ], [ [[I_UNR1]], %[[FOR_END10_LOOPEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
-; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label %[[FOR_OUTER_EPIL:.*]], label %[[FOR_END10:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL]]:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
 ; CHECK:       [[FOR_INNER_EPIL]]:
-; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ [[TMP10]], %[[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_EPIL]] = add i32 [[TMP11]], [[SUM_EPIL]]
-; CHECK-NEXT:    [[INC_EPIL]] = add nuw i32 [[J_EPIL]], 1
-; CHECK-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[INC_EPIL]], [[M]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]]
-; CHECK:       [[FOR_LATCH_EPIL]]:
-; CHECK-NEXT:    [[ADD_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD_EPIL]], %[[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
-; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_END10_LOOPEXIT_EPILOG_LCSSA:.*]]
-; CHECK:       [[FOR_OUTER_EPIL_1]]:
+; CHECK-NEXT:    [[ADD9_EPIL:%.*]] = phi i32 [ [[ADD9:%.*]], %[[FOR_INNER_EPIL_2:.*]] ], [ 0, %[[FOR_OUTER_EPIL]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_EPIL]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_1:.*]]
 ; CHECK:       [[FOR_INNER_EPIL_1]]:
-; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ [[TMP12]], %[[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_INNER_EPIL]] ], [ [[INC_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ [[TMP0]], %[[FOR_INNER_EPIL]] ], [ [[ADD_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_1]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD_EPIL_1]] = add i32 [[TMP13]], [[SUM_EPIL_1]]
 ; CHECK-NEXT:    [[INC_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[INC_EPIL_1]], [[M]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]]
-; CHECK:       [[FOR_LATCH_EPIL_1]]:
-; CHECK-NEXT:    [[ADD_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD_EPIL_1]], %[[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2
-; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_END10_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       [[FOR_OUTER_EPIL_2]]:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_EPIL_1]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_2:.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label %[[FOR_INNER_EPIL_2]], label %[[FOR_INNER_EPIL_1]]
 ; CHECK:       [[FOR_INNER_EPIL_2]]:
-; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ [[TMP14]], %[[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_2]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_EPIL_2]] = add i32 [[TMP15]], [[SUM_EPIL_2]]
-; CHECK-NEXT:    [[INC_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1
-; CHECK-NEXT:    [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[INC_EPIL_2]], [[M]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]]
-; CHECK:       [[FOR_LATCH_EPIL_2]]:
-; CHECK-NEXT:    [[ADD_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD_EPIL_2]], %[[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    store i32 [[ADD_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    br label %[[FOR_END10_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       [[FOR_END10_LOOPEXIT_EPILOG_LCSSA]]:
-; CHECK-NEXT:    br label %[[FOR_END10_LOOPEXIT]]
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD_EPIL_1]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    store i32 [[ADD_LCSSA]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    [[ADD9]] = add nuw i32 [[ADD9_EPIL]], 1
+; CHECK-NEXT:    [[EXITCOND28:%.*]] = icmp eq i32 [[ADD9]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND28]], label %[[FOR_END10_LOOPEXIT:.*]], label %[[FOR_INNER_EPIL]]
 ; CHECK:       [[FOR_END10_LOOPEXIT]]:
 ; CHECK-NEXT:    br label %[[FOR_END10]]
 ; CHECK:       [[FOR_END10]]:
@@ -428,7 +321,7 @@ define void @test2_consts(ptr noalias nocapture %A, ptr noalias nocapture readon
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], 100
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END10_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END10_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP6:![0-9]+]]
 ; CHECK:       [[FOR_END10_LOOPEXIT_UNR_LCSSA]]:
 ; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ [[ADD9_3]], %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    br i1 true, label %[[FOR_OUTER_EPIL_PREHEADER]], label %[[FOR_END10:.*]]
@@ -768,7 +661,7 @@ define i32 @test6() #0 {
 ; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp ne i32 [[INC_3]], 7
 ; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_INNER]], label %[[FOR_LATCH]]
 ; CHECK:       [[FOR_LATCH]]:
-; CHECK-NEXT:    br i1 false, label %[[FOR_OUTER]], label %[[FOR_END_UNR_LCSSA:.*]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT:    br i1 false, label %[[FOR_OUTER]], label %[[FOR_END_UNR_LCSSA:.*]], !llvm.loop [[LOOP7:![0-9]+]]
 ; CHECK:       [[FOR_END_UNR_LCSSA]]:
 ; CHECK-NEXT:    [[DOTLCSSA_LCSSA_PH:%.*]] = phi i32 [ 2, %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[INC_LCSSA_LCSSA_PH:%.*]] = phi i32 [ 7, %[[FOR_LATCH]] ]
@@ -832,151 +725,31 @@ define void @test7(i32 %N, i32 %M, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[M]], 0
 ; CHECK-NEXT:    [[CMP128:%.*]] = icmp ne i32 [[N]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = and i1 [[CMP128]], [[CMP]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label %[[FOR_PREHEADER:.*]], label %[[FOR_END:.*]]
-; CHECK:       [[FOR_PREHEADER]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[N]], 3
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_PREHEADER_NEW:.*]]
-; CHECK:       [[FOR_PREHEADER_NEW]]:
-; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[N]], [[XTRAITER]]
-; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
-; CHECK:       [[FOR_OUTER]]:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_PREHEADER_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, %[[FOR_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[I]], 1
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i32 [[I]], 2
-; CHECK-NEXT:    [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_2:%.*]] = add nuw nsw i32 [[I]], 3
-; CHECK-NEXT:    [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_3]] = add nuw i32 [[I]], 4
-; CHECK-NEXT:    [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_3]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_3]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER]], 4
-; CHECK-NEXT:    br label %[[FOR_INNER:.*]]
-; CHECK:       [[FOR_LATCH]]:
-; CHECK-NEXT:    [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD9_LCSSA_1:%.*]] = phi i32 [ [[ADD9_1:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD9_LCSSA_2:%.*]] = phi i32 [ [[ADD9_2:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD9_LCSSA_3:%.*]] = phi i32 [ [[ADD9_3:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP9:![0-9]+]]
-; CHECK:       [[FOR_INNER]]:
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_1]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_1:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_2]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_2:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_3]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_3:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]]
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[ARRAYIDX7]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9]] = add i32 [[L1]], [[SUM]]
-; CHECK-NEXT:    [[ADD10]] = add nuw i32 [[J]], 1
-; CHECK-NEXT:    [[ARRAYIDX7_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]]
-; CHECK-NEXT:    [[L1_1:%.*]] = load i32, ptr [[ARRAYIDX7_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_1]] = add i32 [[L1_1]], [[SUM_1]]
-; CHECK-NEXT:    [[ADD10_1]] = add nuw i32 [[J_1]], 1
-; CHECK-NEXT:    [[ARRAYIDX7_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]]
-; CHECK-NEXT:    [[L1_2:%.*]] = load i32, ptr [[ARRAYIDX7_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_2]] = add i32 [[L1_2]], [[SUM_2]]
-; CHECK-NEXT:    [[ADD10_2]] = add nuw i32 [[J_2]], 1
-; CHECK-NEXT:    [[ARRAYIDX7_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]]
-; CHECK-NEXT:    [[L1_3:%.*]] = load i32, ptr [[ARRAYIDX7_3]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_3]] = add i32 [[L1_3]], [[SUM_3]]
-; CHECK-NEXT:    [[ADD10_3]] = add nuw i32 [[J_3]], 1
-; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[ADD10_3]], [[M]]
-; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]]
-; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
-; CHECK-NEXT:    [[I_UNR1:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_LATCH]] ]
-; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER]], label %[[FOR_END_LOOPEXIT:.*]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_END:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_PREHEADER]] ], [ [[I_UNR1]], %[[FOR_END_LOOPEXIT_UNR_LCSSA]] ]
-; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
 ; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL]]:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
+; CHECK-NEXT:    [[ADD_EPIL:%.*]] = phi i32 [ [[ADD_EPIL_1:%.*]], %[[FOR_LATCH_EPIL_1:.*]] ], [ 0, %[[FOR_OUTER_EPIL_PREHEADER]] ]
 ; CHECK-NEXT:    [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_EPIL]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
-; CHECK:       [[FOR_INNER_EPIL]]:
-; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD9_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD10_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    [[ARRAYIDX7_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL]]
-; CHECK-NEXT:    [[L1_EPIL:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_EPIL]] = add i32 [[L1_EPIL]], [[SUM_EPIL]]
-; CHECK-NEXT:    [[ADD10_EPIL]] = add nuw i32 [[J_EPIL]], 1
-; CHECK-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[ADD10_EPIL]], [[M]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]]
-; CHECK:       [[FOR_LATCH_EPIL]]:
-; CHECK-NEXT:    [[ADD9_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD9_EPIL]], %[[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA:.*]]
-; CHECK:       [[FOR_OUTER_EPIL_1]]:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX2_EPIL]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    [[ADD_EPIL_1]] = add nuw i32 [[ADD_EPIL]], 1
 ; CHECK-NEXT:    [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_1]]
 ; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_1:.*]]
-; CHECK:       [[FOR_INNER_EPIL_1]]:
-; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD9_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD10_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    [[ARRAYIDX7_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_1]]
-; CHECK-NEXT:    [[L1_EPIL_1:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_EPIL_1]] = add i32 [[L1_EPIL_1]], [[SUM_EPIL_1]]
-; CHECK-NEXT:    [[ADD10_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1
-; CHECK-NEXT:    [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[ADD10_EPIL_1]], [[M]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]]
 ; CHECK:       [[FOR_LATCH_EPIL_1]]:
-; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD9_EPIL_1]], %[[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       [[FOR_OUTER_EPIL_2]]:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_1]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_EPIL_2:%.*]] = add nuw i32 [[I_UNR]], 3
-; CHECK-NEXT:    [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_2]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX2_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_2:.*]]
-; CHECK:       [[FOR_INNER_EPIL_2]]:
-; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD9_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD10_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
+; CHECK-NEXT:    [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9_EPIL_2:%.*]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX2_EPIL]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    [[EXITCOND30:%.*]] = icmp eq i32 [[ADD_EPIL_1]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND30]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_OUTER_EPIL]]
+; CHECK:       [[FOR_INNER_EPIL_1]]:
+; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD9_EPIL_2]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD10_EPIL_2:%.*]], %[[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX7_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_2]]
 ; CHECK-NEXT:    [[L1_EPIL_2:%.*]] = load i32, ptr [[ARRAYIDX7_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_EPIL_2]] = add i32 [[L1_EPIL_2]], [[SUM_EPIL_2]]
 ; CHECK-NEXT:    [[ADD10_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[ADD10_EPIL_2]], [[M]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]]
-; CHECK:       [[FOR_LATCH_EPIL_2]]:
-; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD9_EPIL_2]], %[[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT_EPILOG_LCSSA]]
-; CHECK:       [[FOR_END_LOOPEXIT_EPILOG_LCSSA]]:
-; CHECK-NEXT:    br label %[[FOR_END_LOOPEXIT]]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_1]], label %[[FOR_INNER_EPIL_1]]
 ; CHECK:       [[FOR_END_LOOPEXIT]]:
 ; CHECK-NEXT:    br label %[[FOR_END]]
 ; CHECK:       [[FOR_END]]:
@@ -1064,7 +837,7 @@ define void @test7_consts(ptr noalias nocapture %A, ptr noalias nocapture readon
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], 100
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP8:![0-9]+]]
 ; CHECK:       [[FOR_INNER]]:
 ; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9]], %[[FOR_INNER]] ]
 ; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10:%.*]], %[[FOR_INNER]] ]
@@ -1165,158 +938,38 @@ define void @test8(i32 %N, i32 %M, ptr noalias nocapture %A, ptr noalias nocaptu
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[M]], 0
 ; CHECK-NEXT:    [[CMP336:%.*]] = icmp eq i32 [[N]], 0
 ; CHECK-NEXT:    [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP336]]
-; CHECK-NEXT:    br i1 [[OR_COND]], label %[[FOR_END:.*]], label %[[FOR_PREHEADER:.*]]
-; CHECK:       [[FOR_PREHEADER]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    br label %[[FOR_OUTEST:.*]]
-; CHECK:       [[FOR_OUTEST]]:
-; CHECK-NEXT:    [[X_038:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_CLEANUP:.*]] ], [ 0, %[[FOR_PREHEADER]] ]
-; CHECK-NEXT:    [[XTRAITER:%.*]] = and i32 [[N]], 3
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[TMP1]], label %[[FOR_OUTER_EPIL_PREHEADER:.*]], label %[[FOR_OUTEST_NEW:.*]]
-; CHECK:       [[FOR_OUTEST_NEW]]:
-; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = sub i32 [[N]], [[XTRAITER]]
-; CHECK-NEXT:    br label %[[FOR_OUTER:.*]]
+; CHECK-NEXT:    br i1 [[OR_COND]], label %[[FOR_END:.*]], label %[[FOR_OUTER:.*]]
 ; CHECK:       [[FOR_OUTER]]:
-; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[ADD_3:%.*]], %[[FOR_LATCH:.*]] ], [ 0, %[[FOR_OUTEST_NEW]] ]
-; CHECK-NEXT:    [[NITER:%.*]] = phi i32 [ 0, %[[FOR_OUTEST_NEW]] ], [ [[NITER_NEXT_3:%.*]], %[[FOR_LATCH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[I]], 1
-; CHECK-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i32 [[I]], 2
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_1]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_4]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_2:%.*]] = add nuw nsw i32 [[I]], 3
-; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_3]] = add nuw i32 [[I]], 4
-; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_3]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[NITER_NEXT_3]] = add i32 [[NITER]], 4
 ; CHECK-NEXT:    br label %[[FOR_INNER:.*]]
 ; CHECK:       [[FOR_INNER]]:
-; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[J:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_1:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_1:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_2:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_2:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD9_3:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD10_3:%.*]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J]]
-; CHECK-NEXT:    [[L1:%.*]] = load i32, ptr [[ARRAYIDX11]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9]] = add i32 [[L1]], [[SUM]]
-; CHECK-NEXT:    [[ADD10]] = add nuw i32 [[J]], 1
-; CHECK-NEXT:    [[ARRAYIDX11_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_1]]
-; CHECK-NEXT:    [[L1_1:%.*]] = load i32, ptr [[ARRAYIDX11_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_1]] = add i32 [[L1_1]], [[SUM_1]]
-; CHECK-NEXT:    [[ADD10_1]] = add nuw i32 [[J_1]], 1
-; CHECK-NEXT:    [[ARRAYIDX11_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_2]]
-; CHECK-NEXT:    [[L1_2:%.*]] = load i32, ptr [[ARRAYIDX11_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_2]] = add i32 [[L1_2]], [[SUM_2]]
-; CHECK-NEXT:    [[ADD10_2]] = add nuw i32 [[J_2]], 1
-; CHECK-NEXT:    [[ARRAYIDX11_3:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_3]]
-; CHECK-NEXT:    [[L1_3:%.*]] = load i32, ptr [[ARRAYIDX11_3]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_3]] = add i32 [[L1_3]], [[SUM_3]]
-; CHECK-NEXT:    [[ADD10_3]] = add nuw i32 [[J_3]], 1
-; CHECK-NEXT:    [[EXITCOND_3:%.*]] = icmp eq i32 [[ADD10_3]], [[M]]
-; CHECK-NEXT:    br i1 [[EXITCOND_3]], label %[[FOR_LATCH]], label %[[FOR_INNER]]
-; CHECK:       [[FOR_LATCH]]:
-; CHECK-NEXT:    [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD9_LCSSA_1:%.*]] = phi i32 [ [[ADD9_1]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD9_LCSSA_2:%.*]] = phi i32 [ [[ADD9_2]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    [[ADD9_LCSSA_3:%.*]] = phi i32 [ [[ADD9_3]], %[[FOR_INNER]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_1]], ptr [[ARRAYIDX_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_2]], ptr [[ARRAYIDX_4]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_3]], ptr [[ARRAYIDX_3]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_CLEANUP_UNR_LCSSA:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP11:![0-9]+]]
-; CHECK:       [[FOR_CLEANUP_UNR_LCSSA]]:
-; CHECK-NEXT:    [[I_UNR1:%.*]] = phi i32 [ [[ADD_3]], %[[FOR_LATCH]] ]
-; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    br i1 [[LCMP_MOD]], label %[[FOR_OUTER_EPIL_PREHEADER]], label %[[FOR_CLEANUP]]
-; CHECK:       [[FOR_OUTER_EPIL_PREHEADER]]:
-; CHECK-NEXT:    [[I_UNR:%.*]] = phi i32 [ 0, %[[FOR_OUTEST]] ], [ [[I_UNR1]], %[[FOR_CLEANUP_UNR_LCSSA]] ]
-; CHECK-NEXT:    [[LCMP_MOD1:%.*]] = icmp ne i32 [[XTRAITER]], 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[LCMP_MOD1]])
+; CHECK-NEXT:    [[X_038:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_CLEANUP:.*]] ], [ 0, %[[FOR_OUTER]] ]
 ; CHECK-NEXT:    br label %[[FOR_OUTER_EPIL:.*]]
 ; CHECK:       [[FOR_OUTER_EPIL]]:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I_UNR]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_EPIL:%.*]] = add nuw i32 [[I_UNR]], 1
+; CHECK-NEXT:    [[ADD_EPIL:%.*]] = phi i32 [ [[ADD_EPIL_1:%.*]], %[[FOR_INNER_EPIL_2:.*]] ], [ 0, %[[FOR_INNER]] ]
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    br label %[[FOR_INNER_EPIL:.*]]
-; CHECK:       [[FOR_INNER_EPIL]]:
-; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD9_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD10_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    [[ARRAYIDX11_EPIL:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL]]
-; CHECK-NEXT:    [[L1_EPIL:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_EPIL]] = add i32 [[L1_EPIL]], [[SUM_EPIL]]
-; CHECK-NEXT:    [[ADD10_EPIL]] = add nuw i32 [[J_EPIL]], 1
-; CHECK-NEXT:    [[EXITCOND_EPIL:%.*]] = icmp eq i32 [[ADD10_EPIL]], [[M]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL]], label %[[FOR_LATCH_EPIL:.*]], label %[[FOR_INNER_EPIL]]
-; CHECK:       [[FOR_LATCH_EPIL]]:
-; CHECK-NEXT:    [[ADD9_LCSSA_EPIL:%.*]] = phi i32 [ [[ADD9_EPIL]], %[[FOR_INNER_EPIL]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL]], ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP]], label %[[FOR_OUTER_EPIL_1:.*]], label %[[FOR_CLEANUP_EPILOG_LCSSA:.*]]
-; CHECK:       [[FOR_OUTER_EPIL_1]]:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_EPIL_1:%.*]] = add nuw i32 [[I_UNR]], 2
+; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    [[ADD_EPIL_1]] = add nuw i32 [[ADD_EPIL]], 1
 ; CHECK-NEXT:    [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_1]]
 ; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_1:.*]]
 ; CHECK:       [[FOR_INNER_EPIL_1]]:
-; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD9_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD10_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD9_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD10_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX11_EPIL_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_1]]
 ; CHECK-NEXT:    [[L1_EPIL_1:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[ADD9_EPIL_1]] = add i32 [[L1_EPIL_1]], [[SUM_EPIL_1]]
 ; CHECK-NEXT:    [[ADD10_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1
 ; CHECK-NEXT:    [[EXITCOND_EPIL_1:%.*]] = icmp eq i32 [[ADD10_EPIL_1]], [[M]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label %[[FOR_LATCH_EPIL_1:.*]], label %[[FOR_INNER_EPIL_1]]
-; CHECK:       [[FOR_LATCH_EPIL_1]]:
-; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_1:%.*]] = phi i32 [ [[ADD9_EPIL_1]], %[[FOR_INNER_EPIL_1]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_1]], ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]]
-; CHECK-NEXT:    br i1 [[EPIL_ITER_CMP_1]], label %[[FOR_OUTER_EPIL_2:.*]], label %[[FOR_CLEANUP_EPILOG_LCSSA]]
-; CHECK:       [[FOR_OUTER_EPIL_2]]:
-; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_1]]
-; CHECK-NEXT:    store i32 0, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD_EPIL_2:%.*]] = add nuw i32 [[I_UNR]], 3
-; CHECK-NEXT:    [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_EPIL_2]]
-; CHECK-NEXT:    store i32 2, ptr [[ARRAYIDX6_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    br label %[[FOR_INNER_EPIL_2:.*]]
+; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_1]], label %[[FOR_INNER_EPIL_2]], label %[[FOR_INNER_EPIL_1]]
 ; CHECK:       [[FOR_INNER_EPIL_2]]:
-; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD9_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD10_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    [[ARRAYIDX11_EPIL_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[J_EPIL_2]]
-; CHECK-NEXT:    [[L1_EPIL_2:%.*]] = load i32, ptr [[ARRAYIDX11_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    [[ADD9_EPIL_2]] = add i32 [[L1_EPIL_2]], [[SUM_EPIL_2]]
-; CHECK-NEXT:    [[ADD10_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1
-; CHECK-NEXT:    [[EXITCOND_EPIL_2:%.*]] = icmp eq i32 [[ADD10_EPIL_2]], [[M]]
-; CHECK-NEXT:    br i1 [[EXITCOND_EPIL_2]], label %[[FOR_LATCH_EPIL_2:.*]], label %[[FOR_INNER_EPIL_2]]
-; CHECK:       [[FOR_LATCH_EPIL_2]]:
-; CHECK-NEXT:    [[ADD9_LCSSA_EPIL_2:%.*]] = phi i32 [ [[ADD9_EPIL_2]], %[[FOR_INNER_EPIL_2]] ]
-; CHECK-NEXT:    store i32 [[ADD9_LCSSA_EPIL_2]], ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[INT_TBAA0]]
-; CHECK-NEXT:    br label %[[FOR_CLEANUP_EPILOG_LCSSA]]
-; CHECK:       [[FOR_CLEANUP_EPILOG_LCSSA]]:
-; CHECK-NEXT:    br label %[[FOR_CLEANUP]]
+; CHECK-NEXT:    [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9_EPIL_1]], %[[FOR_INNER_EPIL_1]] ]
+; CHECK-NEXT:    store i32 [[ADD9_LCSSA]], ptr [[ARRAYIDX6_EPIL]], align 4, !tbaa [[INT_TBAA0]]
+; CHECK-NEXT:    [[EXITCOND39:%.*]] = icmp eq i32 [[ADD_EPIL_1]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND39]], label %[[FOR_CLEANUP]], label %[[FOR_OUTER_EPIL]]
 ; CHECK:       [[FOR_CLEANUP]]:
 ; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[X_038]], 1
 ; CHECK-NEXT:    [[EXITCOND41:%.*]] = icmp eq i32 [[INC]], 5
-; CHECK-NEXT:    br i1 [[EXITCOND41]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_OUTEST]]
+; CHECK-NEXT:    br i1 [[EXITCOND41]], label %[[FOR_END_LOOPEXIT:.*]], label %[[FOR_INNER]]
 ; CHECK:       [[FOR_END_LOOPEXIT]]:
 ; CHECK-NEXT:    br label %[[FOR_END]]
 ; CHECK:       [[FOR_END]]:
@@ -1487,22 +1140,22 @@ define void @test9(i32 %N, i32 %M, ptr nocapture %A, ptr nocapture readonly %B)
 ; CHECK-NEXT:    [[J_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[INC_3:%.*]], %[[FOR_INNER]] ]
 ; CHECK-NEXT:    [[SUM_3:%.*]] = phi i32 [ 0, %[[FOR_OUTER]] ], [ [[ADD_3:%.*]], %[[FOR_INNER]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J]]
-; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 4, !tbaa [[SHORT_TBAA12:![0-9]+]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[ARRAYIDX]], align 4, !tbaa [[SHORT_TBAA9:![0-9]+]]
 ; CHECK-NEXT:    [[SEXT:%.*]] = sext i16 [[TMP2]] to i32
 ; CHECK-NEXT:    [[ADD]] = add i32 [[SEXT]], [[SUM]]
 ; CHECK-NEXT:    [[INC]] = add nuw i32 [[J]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX_1]], align 4, !tbaa [[SHORT_TBAA12]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX_1]], align 4, !tbaa [[SHORT_TBAA9]]
 ; CHECK-NEXT:    [[SEXT_1:%.*]] = sext i16 [[TMP3]] to i32
 ; CHECK-NEXT:    [[ADD_1]] = add i32 [[SEXT_1]], [[SUM_1]]
 ; CHECK-NEXT:    [[INC_1]] = add nuw i32 [[J_1]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_2]], align 4, !tbaa [[SHORT_TBAA12]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX_2]], align 4, !tbaa [[SHORT_TBAA9]]
 ; CHECK-NEXT:    [[SEXT_2:%.*]] = sext i16 [[TMP4]] to i32
 ; CHECK-NEXT:    [[ADD_2]] = add i32 [[SEXT_2]], [[SUM_2]]
 ; CHECK-NEXT:    [[INC_2]] = add nuw i32 [[J_2]], 1
 ; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX_3]], align 4, !tbaa [[SHORT_TBAA12]]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX_3]], align 4, !tbaa [[SHORT_TBAA9]]
 ; CHECK-NEXT:    [[SEXT_3:%.*]] = sext i16 [[TMP5]] to i32
 ; CHECK-NEXT:    [[ADD_3]] = add i32 [[SEXT_3]], [[SUM_3]]
 ; CHECK-NEXT:    [[INC_3]] = add nuw i32 [[J_3]], 1
@@ -1522,7 +1175,7 @@ define void @test9(i32 %N, i32 %M, ptr nocapture %A, ptr nocapture readonly %B)
 ; CHECK-NEXT:    [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD8_2]]
 ; CHECK-NEXT:    store i32 [[ADD_LCSSA_3]], ptr [[ARRAYIDX6_3]], align 4, !tbaa [[INT_TBAA0]]
 ; CHECK-NEXT:    [[NITER_NCMP_3:%.*]] = icmp eq i32 [[NITER_NEXT_3]], [[UNROLL_ITER]]
-; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT:    br i1 [[NITER_NCMP_3]], label %[[FOR_END_LOOPEXIT_UNR_LCSSA:.*]], label %[[FOR_OUTER]], !llvm.loop [[LOOP11:![0-9]+]]
 ; CHECK:       [[FOR_END_LOOPEXIT_UNR_LCSSA]]:
 ; CHECK-NEXT:    [[I_UNR1:%.*]] = phi i32 [ [[ADD8_3]], %[[FOR_LATCH]] ]
 ; CHECK-NEXT:    [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0
@@ -1538,7 +1191,7 @@ define void @test9(i32 %N, i32 %M, ptr nocapture %A, ptr nocapture readonly %B)
 ; CHECK-NEXT:    [[J_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[INC_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    [[SUM_EPIL:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL]] ], [ [[ADD_EPIL:%.*]], %[[FOR_INNER_EPIL]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_EPIL]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[SHORT_TBAA12]]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i16, ptr [[ARRAYIDX_EPIL]], align 4, !tbaa [[SHORT_TBAA9]]
 ; CHECK-NEXT:    [[SEXT_EPIL:%.*]] = sext i16 [[TMP6]] to i32
 ; CHECK-NEXT:    [[ADD_EPIL]] = add i32 [[SEXT_EPIL]], [[SUM_EPIL]]
 ; CHECK-NEXT:    [[INC_EPIL]] = add nuw i32 [[J_EPIL]], 1
@@ -1557,7 +1210,7 @@ define void @test9(i32 %N, i32 %M, ptr nocapture %A, ptr nocapture readonly %B)
 ; CHECK-NEXT:    [[J_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[INC_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    [[SUM_EPIL_1:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_1]] ], [ [[ADD_EPIL_1:%.*]], %[[FOR_INNER_EPIL_1]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_EPIL_1]]
-; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[SHORT_TBAA12]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr [[ARRAYIDX_EPIL_1]], align 4, !tbaa [[SHORT_TBAA9]]
 ; CHECK-NEXT:    [[SEXT_EPIL_1:%.*]] = sext i16 [[TMP7]] to i32
 ; CHECK-NEXT:    [[ADD_EPIL_1]] = add i32 [[SEXT_EPIL_1]], [[SUM_EPIL_1]]
 ; CHECK-NEXT:    [[INC_EPIL_1]] = add nuw i32 [[J_EPIL_1]], 1
@@ -1576,7 +1229,7 @@ define void @test9(i32 %N, i32 %M, ptr nocapture %A, ptr nocapture readonly %B)
 ; CHECK-NEXT:    [[J_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[INC_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
 ; CHECK-NEXT:    [[SUM_EPIL_2:%.*]] = phi i32 [ 0, %[[FOR_OUTER_EPIL_2]] ], [ [[ADD_EPIL_2:%.*]], %[[FOR_INNER_EPIL_2]] ]
 ; CHECK-NEXT:    [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i16, ptr [[B]], i32 [[J_EPIL_2]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[SHORT_TBAA12]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, ptr [[ARRAYIDX_EPIL_2]], align 4, !tbaa [[SHORT_TBAA9]]
 ; CHECK-NEXT:    [[SEXT_EPIL_2:%.*]] = sext i16 [[TMP8]] to i32
 ; CHECK-NEXT:    [[ADD_EPIL_2]] = add i32 [[SEXT_EPIL_2]], [[SUM_EPIL_2]]
 ; CHECK-NEXT:    [[INC_EPIL_2]] = add nuw i32 [[J_EPIL_2]], 1
@@ -1673,7 +1326,7 @@ define signext i16 @test10(i32 %k) #0 {
 ; CHECK-NEXT:    br i1 [[TOBOOL9]], label %[[FOR_BODY2_SPLIT_1:.*]], label %[[FOR_BODY2_SPLIT2_1:.*]]
 ; CHECK:       [[FOR_INC24]]:
 ; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_3:%.*]] = phi i64 [ [[STOREMERGE_4_3:%.*]], %[[FOR_INC21_3]] ]
-; CHECK-NEXT:    br i1 false, label %[[FOR_BODY]], label %[[FOR_END26_UNR_LCSSA:.*]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT:    br i1 false, label %[[FOR_BODY]], label %[[FOR_END26_UNR_LCSSA:.*]], !llvm.loop [[LOOP12:![0-9]+]]
 ; CHECK:       [[FOR_END26_UNR_LCSSA]]:
 ; CHECK-NEXT:    [[DEC_LCSSA_LCSSA_PH:%.*]] = phi i64 [ 0, %[[FOR_INC24]] ]
 ; CHECK-NEXT:    [[STOREMERGE_4_LCSSA_LCSSA_PH:%.*]] = phi i64 [ [[STOREMERGE_4_LCSSA_3]], %[[FOR_INC24]] ]
@@ -1813,11 +1466,8 @@ for.end26:
 ; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META5]]}
 ; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META5]]}
 ; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]]}
-; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]]}
-; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META5]]}
+; CHECK: [[SHORT_TBAA9]] = !{[[META10:![0-9]+]], [[META10]], i64 0}
+; CHECK: [[META10]] = !{!"short", [[META2]], i64 0}
 ; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META5]]}
-; CHECK: [[SHORT_TBAA12]] = !{[[META13:![0-9]+]], [[META13]], i64 0}
-; CHECK: [[META13]] = !{!"short", [[META2]], i64 0}
-; CHECK: [[LOOP14]] = distinct !{[[LOOP14]], [[META5]]}
-; CHECK: [[LOOP15]] = distinct !{[[LOOP15]], [[META5]]}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META5]]}
 ;.

>From 364b1ebc3a6ed02d9eba106312e4b9c0c22ebc1b Mon Sep 17 00:00:00 2001
From: Ehsan Amiri <ehsan.amiri at huawei.com>
Date: Fri, 27 Feb 2026 16:01:54 -0500
Subject: [PATCH 3/6] change fusion testcase

---
 .../LoopFusion/da_separate_loops.ll           | 23 +++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/llvm/test/Transforms/LoopFusion/da_separate_loops.ll b/llvm/test/Transforms/LoopFusion/da_separate_loops.ll
index 22689eda88561..84282c19b3cbb 100644
--- a/llvm/test/Transforms/LoopFusion/da_separate_loops.ll
+++ b/llvm/test/Transforms/LoopFusion/da_separate_loops.ll
@@ -1,10 +1,11 @@
 ; REQUIRES: asserts
 
 ; RUN: opt -passes=loop-fusion -da-disable-delinearization-checks -disable-output -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
-; STAT: 2 loop-fusion - Dependencies prevent fusion
+; STAT: 2 loop-fusion - DA checks passed 
 
 ; The two inner loops have no dependency and are allowed to be fused as in the
 ; outer loops, different levels are accessed to.
+; After fixing correctness issues of DA, the loops are not fused.
 
 ; C Code
 ;
@@ -19,23 +20,21 @@
 
 define void @nonequal_outer_access(i64 %n, ptr %A) nounwind uwtable ssp {
 entry:
-  %cmp10 = icmp sgt i64 %n, 0
-  br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
+  br label %for.cond1.preheader.preheader
 
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
 
 for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc24
   %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
-  %cmp26 = icmp sgt i64 %n, 0
-  br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
+  br label %for.cond4.preheader.preheader
 
 for.cond4.preheader.preheader:                    ; preds = %for.cond1.preheader
   br label %for.cond4.preheader
 
 for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.inc21
   %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
-  %cmp51 = icmp sgt i64 %n, 0
+  %cmp51 = icmp sgt i64 1000, 0
   br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
 
 for.body6.preheader:                              ; preds = %for.cond4.preheader
@@ -46,14 +45,14 @@ for.body6:                                        ; preds = %for.body6.preheader
   %arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
   store i64 %i.011, ptr %arrayidx8, align 8
   %inc = add nsw i64 %k.02, 1
-  %exitcond13 = icmp ne i64 %inc, %n
+  %exitcond13 = icmp ne i64 %inc, 1000
   br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
 
 for.cond10.loopexit.loopexit:                     ; preds = %for.body6
   br label %for.cond10.loopexit
 
 for.cond10.loopexit:                              ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
-  %cmp113 = icmp sgt i64 %n, 0
+  %cmp113 = icmp sgt i64 1000, 0
   br i1 %cmp113, label %for.body12.preheader, label %for.inc21
 
 for.body12.preheader:                             ; preds = %for.cond10.loopexit
@@ -67,7 +66,7 @@ for.body12:                                       ; preds = %for.body12.preheade
   %arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %add14, i64 %add13, i64 %add
   %0 = load i64, ptr %arrayidx17, align 8
   %inc19 = add nsw i64 %k9.05, 1
-  %exitcond = icmp ne i64 %inc19, %n
+  %exitcond = icmp ne i64 %inc19, 1000
   br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
 
 for.inc21.loopexit:                               ; preds = %for.body12
@@ -75,7 +74,7 @@ for.inc21.loopexit:                               ; preds = %for.body12
 
 for.inc21:                                        ; preds = %for.inc21.loopexit, %for.cond10.loopexit
   %inc22 = add nsw i64 %j.07, 1
-  %exitcond14 = icmp ne i64 %inc22, %n
+  %exitcond14 = icmp ne i64 %inc22, 1000
   br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
 
 for.inc24.loopexit:                               ; preds = %for.inc21
@@ -83,7 +82,7 @@ for.inc24.loopexit:                               ; preds = %for.inc21
 
 for.inc24:                                        ; preds = %for.inc24.loopexit, %for.cond1.preheader
   %inc25 = add nsw i64 %i.011, 1
-  %exitcond15 = icmp ne i64 %inc25, %n
+  %exitcond15 = icmp ne i64 %inc25, 1000
   br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
 
 for.end26.loopexit:                               ; preds = %for.inc24
@@ -179,4 +178,4 @@ for.end26.loopexit:                               ; preds = %for.inc24
 
 for.end26:                                        ; preds = %for.end26.loopexit, %entry
   ret void
-}
\ No newline at end of file
+}

>From 9ebdec087a1b921b2987736e66970b05565c0382 Mon Sep 17 00:00:00 2001
From: Ehsan Amiri <ehsan.amiri at huawei.com>
Date: Fri, 27 Feb 2026 16:03:51 -0500
Subject: [PATCH 4/6] more changes in the fusion testcase

---
 .../test/Transforms/LoopFusion/da_separate_loops.ll | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/llvm/test/Transforms/LoopFusion/da_separate_loops.ll b/llvm/test/Transforms/LoopFusion/da_separate_loops.ll
index 84282c19b3cbb..e93bfb78adee5 100644
--- a/llvm/test/Transforms/LoopFusion/da_separate_loops.ll
+++ b/llvm/test/Transforms/LoopFusion/da_separate_loops.ll
@@ -1,19 +1,18 @@
 ; REQUIRES: asserts
 
 ; RUN: opt -passes=loop-fusion -da-disable-delinearization-checks -disable-output -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
-; STAT: 2 loop-fusion - DA checks passed 
+; STAT: 2 loop-fusion - DA checks passed
 
 ; The two inner loops have no dependency and are allowed to be fused as in the
 ; outer loops, different levels are accessed to.
-; After fixing correctness issues of DA, the loops are not fused.
 
 ; C Code
 ;
-;;  for (long int i = 0; i < n; i++) {
-;;    for (long int j = 0; j < n; j++) {
-;;      for (long int k = 0; k < n; k++)
+;;  for (long int i = 0; i < 1000; i++) {
+;;    for (long int j = 0; j < 1000; j++) {
+;;      for (long int k = 0; k < 1000; k++)
 ;;        A[i][j][k] = i;
-;;      for (long int k = 0; k < n; k++)
+;;      for (long int k = 0; k < 1000; k++)
 ;;        temp = A[i + 3][j + 2][k + 1];
 ;;    }
 ;;  }
@@ -178,4 +177,4 @@ for.end26.loopexit:                               ; preds = %for.inc24
 
 for.end26:                                        ; preds = %for.end26.loopexit, %entry
   ret void
-}
+}
\ No newline at end of file

>From f0cc48799a6c5adb8f4b596c2315a46d5fb0a67c Mon Sep 17 00:00:00 2001
From: Ehsan Amiri <ehsan.amiri at huawei.com>
Date: Wed, 4 Mar 2026 16:35:32 -0500
Subject: [PATCH 5/6] address review comments on loop interchange testcase

---
 .../LoopInterchange/lcssa-preheader.ll        | 31 +++++++++++++------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
index c9bf1d9f7126b..66078a857fe3c 100644
--- a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
+++ b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll
@@ -161,7 +161,7 @@ define void @test2(i32 %N) {
 ; CHECK-NEXT:    br label %[[INNER:.*]]
 ; CHECK:       [[INNER]]:
 ; CHECK-NEXT:    [[INNER_IV:%.*]] = phi i64 [ 0, %[[OUTER_HEADER]] ], [ [[TMP0:%.*]], %[[INNER]] ]
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x [4 x [2 x i16]]], ptr @global, i64 0, i64 [[INNER_IV]], i64 [[OUTER_IV]], i64 0
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i16], ptr @global, i64 [[INNER_IV]], i64 [[OUTER_IV]]
 ; CHECK-NEXT:    store i16 0, ptr [[TMP8]], align 2
 ; CHECK-NEXT:    [[TMP0]] = add nsw i64 [[INNER_IV]], 1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[TMP0]], [[N_EXT]]
@@ -175,24 +175,35 @@ define void @test2(i32 %N) {
 ;
 ; CHECK-DELIN-LABEL: define void @test2(
 ; CHECK-DELIN-SAME: i32 [[N:%.*]]) {
-; CHECK-DELIN-NEXT:  [[OUTER_HEADER_PREHEADER:.*]]:
+; CHECK-DELIN-NEXT:  [[BB:.*:]]
+; CHECK-DELIN-NEXT:    br label %[[INNER_PREHEADER:.*]]
+; CHECK-DELIN:       [[OUTER_HEADER_PREHEADER:.*]]:
 ; CHECK-DELIN-NEXT:    br label %[[OUTER_HEADER:.*]]
 ; CHECK-DELIN:       [[OUTER_HEADER]]:
-; CHECK-DELIN-NEXT:    [[OUTER_IV:%.*]] = phi i64 [ 0, %[[OUTER_HEADER_PREHEADER]] ], [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ]
+; CHECK-DELIN-NEXT:    [[OUTER_IV:%.*]] = phi i64 [ [[OUTER_IV_NEXT:%.*]], %[[OUTER_LATCH:.*]] ], [ 0, %[[OUTER_HEADER_PREHEADER]] ]
 ; CHECK-DELIN-NEXT:    [[N_EXT:%.*]] = sext i32 [[N]] to i64
 ; CHECK-DELIN-NEXT:    br label %[[INNER_SPLIT1:.*]]
+; CHECK-DELIN:       [[INNER_PREHEADER]]:
+; CHECK-DELIN-NEXT:    br label %[[INNER:.*]]
+; CHECK-DELIN:       [[INNER]]:
+; CHECK-DELIN-NEXT:    [[INNER_IV:%.*]] = phi i64 [ [[TMP0:%.*]], %[[INNER_SPLIT:.*]] ], [ 0, %[[INNER_PREHEADER]] ]
+; CHECK-DELIN-NEXT:    br label %[[OUTER_HEADER_PREHEADER]]
 ; CHECK-DELIN:       [[INNER_SPLIT1]]:
-; CHECK-DELIN-NEXT:    [[INNER_IV:%.*]] = phi i64 [ 0, %[[OUTER_HEADER]] ], [ [[TMP0:%.*]], %[[INNER_SPLIT1]] ]
-; CHECK-DELIN-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [4 x [4 x [2 x i16]]], ptr @global, i64 0, i64 [[INNER_IV]], i64 [[OUTER_IV]], i64 0
+; CHECK-DELIN-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i16], ptr @global, i64 [[INNER_IV]], i64 [[OUTER_IV]]
 ; CHECK-DELIN-NEXT:    store i16 0, ptr [[TMP8]], align 2
+; CHECK-DELIN-NEXT:    [[INNER_IV_NEXT:%.*]] = add nsw i64 [[INNER_IV]], 1
+; CHECK-DELIN-NEXT:    [[C_1:%.*]] = icmp ne i64 [[INNER_IV_NEXT]], [[N_EXT]]
+; CHECK-DELIN-NEXT:    br label %[[OUTER_LATCH]]
+; CHECK-DELIN:       [[INNER_SPLIT]]:
+; CHECK-DELIN-NEXT:    [[N_EXT_LCSSA:%.*]] = phi i64 [ [[N_EXT]], %[[OUTER_LATCH]] ]
 ; CHECK-DELIN-NEXT:    [[TMP0]] = add nsw i64 [[INNER_IV]], 1
-; CHECK-DELIN-NEXT:    [[C_1:%.*]] = icmp ne i64 [[TMP0]], [[N_EXT]]
-; CHECK-DELIN-NEXT:    br i1 [[C_1]], label %[[INNER_SPLIT1]], label %[[OUTER_LATCH]]
+; CHECK-DELIN-NEXT:    [[TMP1:%.*]] = icmp ne i64 [[TMP0]], [[N_EXT_LCSSA]]
+; CHECK-DELIN-NEXT:    br i1 [[TMP1]], label %[[INNER]], label %[[EXIT:.*]]
 ; CHECK-DELIN:       [[OUTER_LATCH]]:
 ; CHECK-DELIN-NEXT:    [[OUTER_IV_NEXT]] = add nsw i64 [[OUTER_IV]], 1
 ; CHECK-DELIN-NEXT:    [[C_2:%.*]] = icmp ne i64 [[OUTER_IV]], [[N_EXT]]
-; CHECK-DELIN-NEXT:    br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[INNER_SPLIT:.*]]
-; CHECK-DELIN:       [[INNER_SPLIT]]:
+; CHECK-DELIN-NEXT:    br i1 [[C_2]], label %[[OUTER_HEADER]], label %[[INNER_SPLIT]]
+; CHECK-DELIN:       [[EXIT]]:
 ; CHECK-DELIN-NEXT:    ret void
 ;
 bb:
@@ -205,7 +216,7 @@ outer.header:                                              ; preds = %bb11, %bb2
 
 inner:                                              ; preds = %bb6, %bb4
   %inner.iv = phi i64 [ 0, %outer.header ], [ %inner.iv.next, %inner ]
-  %gep = getelementptr inbounds [4 x [4 x [2 x i16]]], ptr @global, i64 0, i64 %inner.iv, i64 %outer.iv, i64 0
+  %gep = getelementptr inbounds [2 x i16], ptr @global, i64 %inner.iv, i64 %outer.iv
   store i16 0, ptr %gep
   %inner.iv.next = add nsw i64 %inner.iv, 1
   %c.1 = icmp ne i64 %inner.iv.next, %N.ext

>From 7c8fb3105ee7b70486d7ec495b765c2072fc21d6 Mon Sep 17 00:00:00 2001
From: Ehsan Amiri <ehsan.amiri at huawei.com>
Date: Thu, 5 Mar 2026 14:15:04 -0500
Subject: [PATCH 6/6] fixing the loop fusion test

---
 .../LoopFusion/da_separate_loops.ll           | 31 ++++++++++---------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git a/llvm/test/Transforms/LoopFusion/da_separate_loops.ll b/llvm/test/Transforms/LoopFusion/da_separate_loops.ll
index e93bfb78adee5..822ed41407302 100644
--- a/llvm/test/Transforms/LoopFusion/da_separate_loops.ll
+++ b/llvm/test/Transforms/LoopFusion/da_separate_loops.ll
@@ -1,39 +1,42 @@
 ; REQUIRES: asserts
 
 ; RUN: opt -passes=loop-fusion -da-disable-delinearization-checks -disable-output -stats < %s 2>&1 | FileCheck -check-prefix=STAT %s
-; STAT: 2 loop-fusion - DA checks passed
+; STAT: 2 loop-fusion - Dependencies prevent fusion
+; STAT: 1 loop-fusion - DA checks passed
 
 ; The two inner loops have no dependency and are allowed to be fused as in the
 ; outer loops, different levels are accessed to.
 
 ; C Code
 ;
-;;  for (long int i = 0; i < 1000; i++) {
-;;    for (long int j = 0; j < 1000; j++) {
-;;      for (long int k = 0; k < 1000; k++)
+;;  for (long int i = 0; i < n; i++) {
+;;    for (long int j = 0; j < n; j++) {
+;;      for (long int k = 0; k < n; k++)
 ;;        A[i][j][k] = i;
-;;      for (long int k = 0; k < 1000; k++)
+;;      for (long int k = 0; k < n; k++)
 ;;        temp = A[i + 3][j + 2][k + 1];
 ;;    }
 ;;  }
 
 define void @nonequal_outer_access(i64 %n, ptr %A) nounwind uwtable ssp {
 entry:
-  br label %for.cond1.preheader.preheader
+  %cmp10 = icmp sgt i64 %n, 0
+  br i1 %cmp10, label %for.cond1.preheader.preheader, label %for.end26
 
 for.cond1.preheader.preheader:                    ; preds = %entry
   br label %for.cond1.preheader
 
 for.cond1.preheader:                              ; preds = %for.cond1.preheader.preheader, %for.inc24
   %i.011 = phi i64 [ %inc25, %for.inc24 ], [ 0, %for.cond1.preheader.preheader ]
-  br label %for.cond4.preheader.preheader
+  %cmp26 = icmp sgt i64 %n, 0
+  br i1 %cmp26, label %for.cond4.preheader.preheader, label %for.inc24
 
 for.cond4.preheader.preheader:                    ; preds = %for.cond1.preheader
   br label %for.cond4.preheader
 
 for.cond4.preheader:                              ; preds = %for.cond4.preheader.preheader, %for.inc21
   %j.07 = phi i64 [ %inc22, %for.inc21 ], [ 0, %for.cond4.preheader.preheader ]
-  %cmp51 = icmp sgt i64 1000, 0
+  %cmp51 = icmp sgt i64 %n, 0
   br i1 %cmp51, label %for.body6.preheader, label %for.cond10.loopexit
 
 for.body6.preheader:                              ; preds = %for.cond4.preheader
@@ -44,14 +47,14 @@ for.body6:                                        ; preds = %for.body6.preheader
   %arrayidx8 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %i.011, i64 %j.07, i64 %k.02
   store i64 %i.011, ptr %arrayidx8, align 8
   %inc = add nsw i64 %k.02, 1
-  %exitcond13 = icmp ne i64 %inc, 1000
+  %exitcond13 = icmp ne i64 %inc, %n
   br i1 %exitcond13, label %for.body6, label %for.cond10.loopexit.loopexit
 
 for.cond10.loopexit.loopexit:                     ; preds = %for.body6
   br label %for.cond10.loopexit
 
 for.cond10.loopexit:                              ; preds = %for.cond10.loopexit.loopexit, %for.cond4.preheader
-  %cmp113 = icmp sgt i64 1000, 0
+  %cmp113 = icmp sgt i64 %n, 0
   br i1 %cmp113, label %for.body12.preheader, label %for.inc21
 
 for.body12.preheader:                             ; preds = %for.cond10.loopexit
@@ -65,7 +68,7 @@ for.body12:                                       ; preds = %for.body12.preheade
   %arrayidx17 = getelementptr inbounds [100 x [100 x i64]], ptr %A, i64 %add14, i64 %add13, i64 %add
   %0 = load i64, ptr %arrayidx17, align 8
   %inc19 = add nsw i64 %k9.05, 1
-  %exitcond = icmp ne i64 %inc19, 1000
+  %exitcond = icmp ne i64 %inc19, %n
   br i1 %exitcond, label %for.body12, label %for.inc21.loopexit
 
 for.inc21.loopexit:                               ; preds = %for.body12
@@ -73,7 +76,7 @@ for.inc21.loopexit:                               ; preds = %for.body12
 
 for.inc21:                                        ; preds = %for.inc21.loopexit, %for.cond10.loopexit
   %inc22 = add nsw i64 %j.07, 1
-  %exitcond14 = icmp ne i64 %inc22, 1000
+  %exitcond14 = icmp ne i64 %inc22, %n
   br i1 %exitcond14, label %for.cond4.preheader, label %for.inc24.loopexit
 
 for.inc24.loopexit:                               ; preds = %for.inc21
@@ -81,7 +84,7 @@ for.inc24.loopexit:                               ; preds = %for.inc21
 
 for.inc24:                                        ; preds = %for.inc24.loopexit, %for.cond1.preheader
   %inc25 = add nsw i64 %i.011, 1
-  %exitcond15 = icmp ne i64 %inc25, 1000
+  %exitcond15 = icmp ne i64 %inc25, %n
   br i1 %exitcond15, label %for.cond1.preheader, label %for.end26.loopexit
 
 for.end26.loopexit:                               ; preds = %for.inc24
@@ -177,4 +180,4 @@ for.end26.loopexit:                               ; preds = %for.inc24
 
 for.end26:                                        ; preds = %for.end26.loopexit, %entry
   ret void
-}
\ No newline at end of file
+}



More information about the llvm-commits mailing list