[llvm] [DependenceAnalysis] Fix incorrect analysis of wrapping AddRec expressions (PR #154982)

Sat Aug 23 08:18:57 PDT 2025

https://github.com/sebpop updated https://github.com/llvm/llvm-project/pull/154982

>From 6c358c66ea2712391f5fb87f079bd7da33bc9e28 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Thu, 21 Aug 2025 14:39:49 -0500
Subject: [PATCH 1/3] [DependenceAnalysis] Fix SIV test crash when no AddRec
 after propagation

Fixes GitHub issue #148435 where testSIV() would hit an assertion failure
when neither Src nor Dst expressions contain AddRec after constraint
propagation. This can occur when propagation simplifies expressions to
non-AddRec forms. The subscript is effectively a zero induction variable.

The fix falls back to ZIV analysis in this case, treating the simplified
expressions as loop-invariant, which is the correct behavior when all
induction variable references have been eliminated through propagation.
The patch also fixes a MIV case that may decay into a ZIV test.

Add missing NonLinear case to switch statements in propagation code
to prevent 'bad subscript classification' crash when subscripts are
reclassified as NonLinear after constraint propagation.

Add regression test to prevent future occurrences of this issue.
---
 llvm/lib/Analysis/DependenceAnalysis.cpp      | 61 +++++++++---
 .../Analysis/DependenceAnalysis/PR148435.ll   | 95 +++++++++++++++++++
 .../DependenceAnalysis/bounds-check.ll        | 29 ++++++
 3 files changed, 170 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/Analysis/DependenceAnalysis/PR148435.ll
 create mode 100644 llvm/test/Analysis/DependenceAnalysis/bounds-check.ll

diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index f33e04e804e3d..c6c2bc9b4ba25 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -873,6 +873,9 @@ void DependenceInfo::collectCommonLoops(const SCEV *Expression,
                                         SmallBitVector &Loops) const {
   while (LoopNest) {
     unsigned Level = LoopNest->getLoopDepth();
+    LLVM_DEBUG(dbgs() << "MaxLevels = " << MaxLevels << "\n");
+    LLVM_DEBUG(dbgs() << "Level = " << Level << "\n");
+    assert(Level <= MaxLevels && "Level larger than MaxLevels.");
     if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
       Loops.set(Level);
     LoopNest = LoopNest->getParentLoop();
@@ -959,6 +962,10 @@ bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
   if (!AddRec)
     return isLoopInvariant(Expr, LoopNest);
 
+  const SCEV *Step = AddRec->getStepRecurrence(*SE);
+  if (!isLoopInvariant(Step, LoopNest))
+    return false;
+
   // The AddRec must depend on one of the containing loops. Otherwise,
   // mapSrcLoop and mapDstLoop return indices outside the intended range. This
   // can happen when a subscript in one loop references an IV from a sibling
@@ -970,14 +977,16 @@ bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
   if (!L)
     return false;
 
+  unsigned Level = IsSrc ? mapSrcLoop(L) : mapDstLoop(L);
+  // Check that the mapped loop index is within bounds for the SmallBitVector.
+  // This can happen when loop depths exceed MaxLevels due to the mapping
+  // algorithm.
+
+  LLVM_DEBUG(dbgs() << "MaxLevels = " << MaxLevels << "\n");
+  LLVM_DEBUG(dbgs() << "Level = " << Level << "\n");
+  assert(Level <= MaxLevels && "Level larger than MaxLevels.");
+  Loops.set(Level);
   const SCEV *Start = AddRec->getStart();
-  const SCEV *Step = AddRec->getStepRecurrence(*SE);
-  if (!isLoopInvariant(Step, LoopNest))
-    return false;
-  if (IsSrc)
-    Loops.set(mapSrcLoop(AddRec->getLoop()));
-  else
-    Loops.set(mapDstLoop(AddRec->getLoop()));
   return checkSubscript(Start, LoopNest, Loops, IsSrc);
 }
 
@@ -2281,8 +2290,14 @@ bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
                               Result, NewConstraint) ||
            gcdMIVtest(Src, Dst, Result);
   }
-  llvm_unreachable("SIV test expected at least one AddRec");
-  return false;
+  // If neither expression is an AddRec, this means propagation has simplified
+  // them to non-AddRec forms. In this case, fall back to ZIV analysis since
+  // the expressions are effectively loop-invariant.
+  LLVM_DEBUG(dbgs() << "    falling back to ZIV test due to no AddRec\n");
+  // Set to first valid level to avoid Level=0 causing DV[-1] access.
+  // See comment in establishNestingLevels.
+  Level = 1;
+  return testZIV(Src, Dst, Result);
 }
 
 // testRDIV -
@@ -2343,8 +2358,14 @@ bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
       SrcLoop = DstAddRec->getLoop();
     } else
       llvm_unreachable("RDIV reached by surprising SCEVs");
-  } else
-    llvm_unreachable("RDIV expected at least one AddRec");
+  } else {
+    // If neither expression is an AddRec, this means propagation has simplified
+    // them to non-AddRec forms. Fall back to ZIV analysis since the expressions
+    // are effectively loop-invariant.
+    LLVM_DEBUG(
+        dbgs() << "    RDIV falling back to ZIV test due to no AddRec\n");
+    return testZIV(Src, Dst, Result);
+  }
   return exactRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, SrcLoop, DstLoop,
                        Result) ||
          gcdMIVtest(Src, Dst, Result) ||
@@ -3821,7 +3842,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
       break;
     case Subscript::SIV: {
       LLVM_DEBUG(dbgs() << ", SIV\n");
-      unsigned Level;
+      unsigned Level = 0;
       const SCEV *SplitIter = nullptr;
       if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
                   SplitIter))
@@ -3872,12 +3893,17 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
         for (unsigned SJ : Sivs.set_bits()) {
           LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
           // SJ is an SIV subscript that's part of the current coupled group
-          unsigned Level;
+          unsigned Level = 0;
           const SCEV *SplitIter = nullptr;
           LLVM_DEBUG(dbgs() << "SIV\n");
           if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
                       SplitIter))
             return nullptr;
+
+          LLVM_DEBUG(dbgs() << "MaxLevels = " << MaxLevels << "\n");
+          LLVM_DEBUG(dbgs() << "Level = " << Level << "\n");
+          assert(Level <= MaxLevels && "Level larger than MaxLevels.");
+
           ConstrainedLevels.set(Level);
           if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
             if (Constraints[Level].isEmpty()) {
@@ -4155,7 +4181,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
   for (unsigned SI : Separable.set_bits()) {
     switch (Pair[SI].Classification) {
     case Subscript::SIV: {
-      unsigned Level;
+      unsigned Level = 0;
       const SCEV *SplitIter = nullptr;
       (void)testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
                     SplitIter);
@@ -4195,12 +4221,17 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
       bool Changed = false;
       for (unsigned SJ : Sivs.set_bits()) {
         // SJ is an SIV subscript that's part of the current coupled group
-        unsigned Level;
+        unsigned Level = 0;
         const SCEV *SplitIter = nullptr;
         (void)testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
                       SplitIter);
         if (Level == SplitLevel && SplitIter)
           return SplitIter;
+
+        LLVM_DEBUG(dbgs() << "MaxLevels = " << MaxLevels << "\n");
+        LLVM_DEBUG(dbgs() << "Level = " << Level << "\n");
+        assert(Level <= MaxLevels && "Level larger than MaxLevels.");
+
         ConstrainedLevels.set(Level);
         if (intersectConstraints(&Constraints[Level], &NewConstraint))
           Changed = true;
diff --git a/llvm/test/Analysis/DependenceAnalysis/PR148435.ll b/llvm/test/Analysis/DependenceAnalysis/PR148435.ll
new file mode 100644
index 0000000000000..1633a91336f68
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/PR148435.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for bug #148435 - SIV test assertion failure.
+; This test ensures that testSIV handles the case where neither Src nor Dst
+; expressions contain AddRec after propagation, which can happen when
+; constraints simplify the expressions to non-AddRec forms.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @_Z1cb(ptr %a) {
+; CHECK-LABEL: '_Z1cb'
+; CHECK-NEXT:  Src: store i8 0, ptr %arrayidx9, align 1 --> Dst: store i8 0, ptr %arrayidx9, align 1
+; CHECK-NEXT:    da analyze - output [*]!
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for.body ]
+  %idxprom = and i64 %indvars.iv23, 1
+  %arrayidx9 = getelementptr inbounds [0 x [12 x [12 x i8]]], ptr %a, i64 0, i64 %idxprom, i64 0, i64 %indvars.iv23
+  store i8 0, ptr %arrayidx9, align 1
+  %indvars.iv.next24 = add i64 %indvars.iv23, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next24, 0
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+ at a = external global [0 x [12 x [12 x i8]]], align 1
+
+define void @test_siv_no_addrec(i1 %d, i32 %b) {
+; CHECK-LABEL: 'test_siv_no_addrec'
+; CHECK-NEXT:  Src: store i8 0, ptr %arrayidx7, align 1 --> Dst: store i8 0, ptr %arrayidx7, align 1
+; CHECK-NEXT:    da analyze - output [* *]!
+;
+entry:
+  %conv.val = select i1 %d, i16 1, i16 0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc8, %entry
+  %e.0 = phi i32 [ %b, %entry ], [ %inc9, %for.inc8 ]
+  %cmp = icmp ult i32 %e.0, 10
+  br i1 %cmp, label %for.cond1, label %for.end10
+
+for.cond1:                                        ; preds = %for.inc, %for.cond
+  %f.0 = phi i16 [ %conv.val, %for.cond ], [ %add, %for.inc ]
+  %cmp2 = icmp slt i16 %f.0, 10
+  br i1 %cmp2, label %for.body4, label %for.inc8
+
+for.body4:                                        ; preds = %for.cond1
+  %sub = add i32 %e.0, -3
+  %idxprom = zext i32 %sub to i64
+  %idxprom5 = sext i16 %f.0 to i64
+  %idxprom6 = zext i32 %e.0 to i64
+  %arrayidx7 = getelementptr inbounds [0 x [12 x [12 x i8]]], ptr @a, i64 0, i64 %idxprom, i64 %idxprom5, i64 %idxprom6
+  store i8 0, ptr %arrayidx7, align 1
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body4
+  %add = add i16 %f.0, 2
+  br label %for.cond1
+
+for.inc8:                                         ; preds = %for.cond1
+  %inc9 = add i32 %e.0, 1
+  br label %for.cond
+
+for.end10:                                        ; preds = %for.cond
+  ret void
+}
+
+define void @f1(ptr %a) {
+; CHECK-LABEL: 'f1'
+; CHECK-NEXT:  Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
+; CHECK-NEXT:    da analyze - none!
+; Note: the second patch for PR148435 modifies the above CHECK to correct "output [*]".
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %and = and i64 %i, 1
+  %idx = getelementptr inbounds [4 x [4 x i8]], ptr %a, i64 0, i64 %and, i64 %and
+  store i8 0, ptr %idx
+  %i.next = add i64 %i, 1
+  %exitcond.not = icmp slt i64 %i.next, 8
+  br i1 %exitcond.not, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/bounds-check.ll b/llvm/test/Analysis/DependenceAnalysis/bounds-check.ll
new file mode 100644
index 0000000000000..dca86e5e55643
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/bounds-check.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for SmallBitVector bounds checking bug in DependenceAnalysis.
+; This test ensures that loop index mapping functions don't cause out-of-bounds
+; access to SmallBitVector when loop depths exceed MaxLevels.
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define void @bounds_check_test(ptr %a) {
+; CHECK-LABEL: 'bounds_check_test'
+; CHECK-NEXT:  Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
+; CHECK-NEXT:    da analyze - none!
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %and = and i64 %i, 1  ; Creates index 0 or 1
+  %idx = getelementptr inbounds [4 x [4 x i8]], ptr %a, i64 0, i64 %and, i64 %i
+  store i8 0, ptr %idx
+  %i.next = add i64 %i, 1
+  %exitcond.not = icmp slt i64 %i.next, 4
+  br i1 %exitcond.not, label %loop, label %exit
+
+exit:
+  ret void
+}

>From 3e993f87a6eafc8a53ba7e67e9900eb99cd05215 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Sat, 23 Aug 2025 09:33:00 -0500
Subject: [PATCH 2/3] [DA] Simplify runtime predicate collection and extend to
 all dependence tests

Previously, predicates were collected using a local `Assume` vector.
This patch:

1. Removes local `Assume` vector, uses class member `Assumptions` instead.
2. Adds a `getNonRedundantAssumptions()` helper for deduplication.
3. Extends predicate collection to all dependence tests.
---
 .../llvm/Analysis/DependenceAnalysis.h        |  6 ++
 llvm/lib/Analysis/DependenceAnalysis.cpp      | 81 ++++++++++++-------
 2 files changed, 56 insertions(+), 31 deletions(-)

diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index f66c79d915665..0db8f5d8eca02 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -598,6 +598,12 @@ class DependenceInfo {
   /// returns NULL.
   const SCEVConstant *collectConstantUpperBound(const Loop *l, Type *T) const;
 
+  /// getNonRedundantAssumptions - Remove redundant assumptions from the
+  /// collection and return a SCEVUnionPredicate with unique assumptions.
+  /// This ensures that each assumption is only present once and that
+  /// stronger assumptions imply weaker ones.
+  SCEVUnionPredicate getNonRedundantAssumptions() const;
+
   /// classifyPair - Examines the subscript pair (the Src and Dst SCEVs)
   /// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
   /// Collects the associated loops in a set.
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index c6c2bc9b4ba25..8bc6ec699549d 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3569,6 +3569,37 @@ SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() const {
   return SCEVUnionPredicate(Assumptions, *SE);
 }
 
+// getNonRedundantAssumptions - Remove redundant assumptions from the collection
+// and return a SCEVUnionPredicate with the unique assumptions. This ensures
+// that each assumption is only present once and that stronger assumptions imply
+// weaker ones, avoiding unnecessary runtime checks.
+SCEVUnionPredicate DependenceInfo::getNonRedundantAssumptions() const {
+  SmallVector<const SCEVPredicate *, 4> UniqueAssumptions;
+
+  for (const SCEVPredicate *P : Assumptions) {
+    bool Implied = false;
+    for (const SCEVPredicate *Existing : UniqueAssumptions) {
+      if (Existing->implies(P, *SE)) {
+        Implied = true;
+        break;
+      }
+    }
+    if (!Implied) {
+      auto I = UniqueAssumptions.begin();
+      while (I != UniqueAssumptions.end()) {
+        if (P->implies(*I, *SE)) {
+          I = UniqueAssumptions.erase(I);
+        } else {
+          ++I;
+        }
+      }
+      UniqueAssumptions.push_back(P);
+    }
+  }
+
+  return SCEVUnionPredicate(UniqueAssumptions, *SE);
+}
+
 // depends -
 // Returns NULL if there is no dependence.
 // Otherwise, return a Dependence with as many details as possible.
@@ -3583,7 +3614,6 @@ SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() const {
 std::unique_ptr<Dependence>
 DependenceInfo::depends(Instruction *Src, Instruction *Dst,
                         bool UnderRuntimeAssumptions) {
-  SmallVector<const SCEVPredicate *, 4> Assume;
   bool PossiblyLoopIndependent = true;
   if (Src == Dst)
     PossiblyLoopIndependent = false;
@@ -3595,8 +3625,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
     // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
     LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
-    return std::make_unique<Dependence>(Src, Dst,
-                                        SCEVUnionPredicate(Assume, *SE));
+    return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
   }
 
   const MemoryLocation &DstLoc = MemoryLocation::get(Dst);
@@ -3607,8 +3636,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   case AliasResult::PartialAlias:
     // cannot analyse objects if we don't understand their aliasing.
     LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
-    return std::make_unique<Dependence>(Src, Dst,
-                                        SCEVUnionPredicate(Assume, *SE));
+    return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
   case AliasResult::NoAlias:
     // If the objects noalias, they are distinct, accesses are independent.
     LLVM_DEBUG(dbgs() << "no alias\n");
@@ -3622,8 +3650,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
     // The dependence test gets confused if the size of the memory accesses
     // differ.
     LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
-    return std::make_unique<Dependence>(Src, Dst,
-                                        SCEVUnionPredicate(Assume, *SE));
+    return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
   }
 
   Value *SrcPtr = getLoadStorePointerOperand(Src);
@@ -3642,8 +3669,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
     // We check this upfront so we don't crash in cases where getMinusSCEV()
     // returns a SCEVCouldNotCompute.
     LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
-    return std::make_unique<Dependence>(Src, Dst,
-                                        SCEVUnionPredicate(Assume, *SE));
+    return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
   }
 
   // Even if the base pointers are the same, they may not be loop-invariant. It
@@ -3655,8 +3681,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   if (!isLoopInvariant(SrcBase, SrcLoop) ||
       !isLoopInvariant(DstBase, DstLoop)) {
     LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
-    return std::make_unique<Dependence>(Src, Dst,
-                                        SCEVUnionPredicate(Assume, *SE));
+    return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
   }
 
   uint64_t EltSize = SrcLoc.Size.toRaw();
@@ -3664,34 +3689,22 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase);
 
   // Check that memory access offsets are multiples of element sizes.
-  if (!SE->isKnownMultipleOf(SrcEv, EltSize, Assume) ||
-      !SE->isKnownMultipleOf(DstEv, EltSize, Assume)) {
+  if (!SE->isKnownMultipleOf(SrcEv, EltSize, Assumptions) ||
+      !SE->isKnownMultipleOf(DstEv, EltSize, Assumptions)) {
     LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
-    return std::make_unique<Dependence>(Src, Dst,
-                                        SCEVUnionPredicate(Assume, *SE));
+    return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
   }
 
-  if (!Assume.empty()) {
-    if (!UnderRuntimeAssumptions)
-      return std::make_unique<Dependence>(Src, Dst,
-                                          SCEVUnionPredicate(Assume, *SE));
-    // Add non-redundant assumptions.
-    unsigned N = Assumptions.size();
-    for (const SCEVPredicate *P : Assume) {
-      bool Implied = false;
-      for (unsigned I = 0; I != N && !Implied; I++)
-        if (Assumptions[I]->implies(P, *SE))
-          Implied = true;
-      if (!Implied)
-        Assumptions.push_back(P);
-    }
-  }
+  // If runtime assumptions were added but not allowed, return confused
+  // dependence.
+  if (!UnderRuntimeAssumptions && !Assumptions.empty())
+    return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
 
   establishNestingLevels(Src, Dst);
   LLVM_DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
   LLVM_DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
 
-  FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE),
+  FullDependence Result(Src, Dst, SCEVUnionPredicate(Assumptions, *SE),
                         PossiblyLoopIndependent, CommonLevels);
   ++TotalArrayPairs;
 
@@ -4040,6 +4053,12 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
       return nullptr;
   }
 
+  // If runtime assumptions were added but not allowed, return confused
+  // dependence.
+  if (!UnderRuntimeAssumptions && !Assumptions.empty())
+    return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
+
+  Result.Assumptions = getNonRedundantAssumptions();
   return std::make_unique<FullDependence>(std::move(Result));
 }
 

>From a04d754cac61fdee59d4dabbda0daa77823f657a Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Thu, 21 Aug 2025 16:58:56 -0500
Subject: [PATCH 3/3] [DependenceAnalysis] Fix incorrect analysis of wrapping
 AddRec expressions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes GitHub issue #148435 where {false,+,true} patterns reported
"da analyze - none!" instead of correct "da analyze - output [*]!".

The issue occurs when AddRec expressions in narrow types create cyclic
patterns (e.g., {false,+,true} in i1 arithmetic: 0,1,0,1,0,1...) that
violate SIV analysis assumptions of linear, non-wrapping recurrences.

The fix detects potential wrapping by checking if step × iteration_count
exceeds the type's representable range, then classifies such expressions
as NonLinear for conservative analysis.

Add wrapping detection in checkSubscript() with fallback to exact and max
backedge taken count for variable bounds.
---
 llvm/include/llvm/Analysis/ScalarEvolution.h  |   6 +
 llvm/lib/Analysis/DependenceAnalysis.cpp      |  15 +++
 llvm/lib/Analysis/ScalarEvolution.cpp         | 125 +++++++++++++++++-
 .../Analysis/DependenceAnalysis/PR148435.ll   |   3 +-
 .../DependenceAnalysis/wrapping-addrec-1.ll   |  38 ++++++
 .../DependenceAnalysis/wrapping-addrec.ll     |  36 +++++
 .../DependenceAnalysis/wrapping-maxbtc.ll     |  35 +++++
 7 files changed, 254 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Analysis/DependenceAnalysis/wrapping-addrec-1.ll
 create mode 100644 llvm/test/Analysis/DependenceAnalysis/wrapping-addrec.ll
 create mode 100644 llvm/test/Analysis/DependenceAnalysis/wrapping-maxbtc.ll

diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 167845ce646b9..638509392eb4e 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1337,6 +1337,12 @@ class ScalarEvolution {
   /// sharpen it.
   LLVM_ABI void setNoWrapFlags(SCEVAddRecExpr *AddRec, SCEV::NoWrapFlags Flags);
 
+  /// Check if this AddRec expression may wrap, making it non-affine.
+  /// Wrapping AddRecs create cyclic patterns that violate linearity
+  /// assumptions. Returns true if definitely wraps, false if definitely safe,
+  /// nullopt if unknown.
+  LLVM_ABI std::optional<bool> mayAddRecWrap(const SCEVAddRecExpr *AddRec);
+
   class LoopGuards {
     DenseMap<const SCEV *, const SCEV *> RewriteMap;
     bool PreserveNUW = false;
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 8bc6ec699549d..dbaef19ebf3d4 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -966,6 +966,21 @@ bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
   if (!isLoopInvariant(Step, LoopNest))
     return false;
 
+  // Check if this AddRec expression may wrap, making it non-affine.
+  std::optional<bool> MayWrap = SE->mayAddRecWrap(AddRec);
+  if (MayWrap == true) {
+    // AddRec is known to wrap.
+    return false;
+  } else if (!MayWrap.has_value()) {
+    // Unknown whether it wraps - add runtime predicate that it doesn't wrap.
+    auto WrapFlags = static_cast<SCEVWrapPredicate::IncrementWrapFlags>(
+        SCEVWrapPredicate::IncrementNUSW | SCEVWrapPredicate::IncrementNSSW);
+    const SCEVPredicate *WrapPred = SE->getWrapPredicate(AddRec, WrapFlags);
+    const_cast<DependenceInfo *>(this)->Assumptions.push_back(WrapPred);
+    LLVM_DEBUG(dbgs() << "\t    Added runtime wrap assumption for: " << *AddRec
+                      << "\n");
+  }
+
   // The AddRec must depend on one of the containing loops. Otherwise,
   // mapSrcLoop and mapDstLoop return indices outside the intended range. This
   // can happen when a subscript in one loop references an IV from a sibling
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index d2c445f1ffaa0..264ca9c168959 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6439,8 +6439,129 @@ void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec,
   }
 }
 
-ConstantRange ScalarEvolution::
-getRangeForUnknownRecurrence(const SCEVUnknown *U) {
+std::optional<bool>
+ScalarEvolution::mayAddRecWrap(const SCEVAddRecExpr *AddRec) {
+  Type *Ty = AddRec->getType();
+
+  // Pointer AddRec expressions do not wrap in the arithmetic sense.
+  if (Ty->isPointerTy())
+    return false;
+
+  // Step 1: Check existing no-wrap flags from SCEV construction.
+  if (AddRec->hasNoSelfWrap() || AddRec->hasNoUnsignedWrap() ||
+      AddRec->hasNoSignedWrap()) {
+    LLVM_DEBUG(dbgs() << "\t\tAddRec has no-wrap flags: " << *AddRec << "\n");
+    return false;
+  }
+
+  // Step 2: Try to prove no-wrap using constant range analysis.
+  // Uses the same logic as proveNoWrapViaConstantRanges.
+  if (AddRec->isAffine()) {
+    const Loop *Loop = AddRec->getLoop();
+    const SCEV *BECount = getConstantMaxBackedgeTakenCount(Loop);
+    if (const SCEVConstant *BECountMax = dyn_cast<SCEVConstant>(BECount)) {
+      ConstantRange StepCR = getSignedRange(AddRec->getStepRecurrence(*this));
+      const APInt &BECountAP = BECountMax->getAPInt();
+      unsigned NoOverflowBitWidth =
+          BECountAP.getActiveBits() + StepCR.getMinSignedBits();
+      if (NoOverflowBitWidth <= getTypeSizeInBits(AddRec->getType())) {
+        LLVM_DEBUG(dbgs() << "\t\tConstant range analysis proves no-wrap: "
+                          << *AddRec << "\n");
+        return false;
+      }
+    }
+  }
+
+  // Step 3: Try to prove using signed/unsigned range containment.
+  // Uses the range containment checks from proveNoWrapViaConstantRanges.
+  if (AddRec->isAffine()) {
+    using OBO = OverflowingBinaryOperator;
+
+    // Check unsigned wrap.
+    ConstantRange AddRecRange = getUnsignedRange(AddRec);
+    ConstantRange IncRange = getUnsignedRange(AddRec->getStepRecurrence(*this));
+
+    auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
+        Instruction::Add, IncRange, OBO::NoUnsignedWrap);
+    if (NUWRegion.contains(AddRecRange)) {
+      LLVM_DEBUG(dbgs() << "\t\tUnsigned range analysis proves no-wrap: "
+                        << *AddRec << "\n");
+      return false;
+    }
+
+    // Check signed wrap.
+    ConstantRange SignedAddRecRange = getSignedRange(AddRec);
+    ConstantRange SignedIncRange =
+        getSignedRange(AddRec->getStepRecurrence(*this));
+
+    auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
+        Instruction::Add, SignedIncRange, OBO::NoSignedWrap);
+    if (NSWRegion.contains(SignedAddRecRange)) {
+      LLVM_DEBUG(dbgs() << "\t\tSigned range analysis proves no-wrap: "
+                        << *AddRec << "\n");
+      return false;
+    }
+  }
+
+  // Step 4: Try induction-based proving methods.
+  // Call the existing sophisticated analysis methods.
+  SCEV::NoWrapFlags ProvenFlags = proveNoWrapViaConstantRanges(AddRec);
+  if (hasFlags(ProvenFlags, SCEV::FlagNW) ||
+      hasFlags(ProvenFlags, SCEV::FlagNUW) ||
+      hasFlags(ProvenFlags, SCEV::FlagNSW)) {
+    LLVM_DEBUG(dbgs() << "\t\tAdvanced constant range analysis proves no-wrap: "
+                      << *AddRec << "\n");
+    return false;
+  }
+
+  ProvenFlags = proveNoSignedWrapViaInduction(AddRec);
+  if (hasFlags(ProvenFlags, SCEV::FlagNSW)) {
+    LLVM_DEBUG(dbgs() << "\t\tSigned induction analysis proves no-wrap: "
+                      << *AddRec << "\n");
+    return false;
+  }
+
+  ProvenFlags = proveNoUnsignedWrapViaInduction(AddRec);
+  if (hasFlags(ProvenFlags, SCEV::FlagNUW)) {
+    LLVM_DEBUG(dbgs() << "\t\tUnsigned induction analysis proves no-wrap: "
+                      << *AddRec << "\n");
+    return false;
+  }
+
+  // Step 5: Fallback to explicit step * iteration calculation for narrow types.
+  const SCEV *Step = AddRec->getStepRecurrence(*this);
+  const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
+  if (!ConstStep)
+    return std::nullopt;
+
+  const Loop *Loop = AddRec->getLoop();
+  if (!hasLoopInvariantBackedgeTakenCount(Loop))
+    return std::nullopt;
+
+  const SCEV *BTC = getBackedgeTakenCount(Loop);
+  const SCEVConstant *ConstBTC = dyn_cast<SCEVConstant>(BTC);
+  if (!ConstBTC)
+    return std::nullopt;
+
+  // Explicit calculation: will step * iterations exceed type range?
+  APInt StepVal = ConstStep->getAPInt();
+  APInt BTCVal = ConstBTC->getAPInt();
+
+  bool Overflow = false;
+  APInt Product = StepVal.zext(64).umul_ov(BTCVal.zext(64), Overflow);
+
+  unsigned BitWidth = Ty->getScalarSizeInBits();
+  if (Overflow || Product.getZExtValue() >= (1ULL << BitWidth)) {
+    LLVM_DEBUG(dbgs() << "\t\tExplicit calculation proves wrapping: " << *AddRec
+                      << "\n");
+    return true;
+  }
+
+  return false;
+}
+
+ConstantRange
+ScalarEvolution::getRangeForUnknownRecurrence(const SCEVUnknown *U) {
   const DataLayout &DL = getDataLayout();
 
   unsigned BitWidth = getTypeSizeInBits(U->getType());
diff --git a/llvm/test/Analysis/DependenceAnalysis/PR148435.ll b/llvm/test/Analysis/DependenceAnalysis/PR148435.ll
index 1633a91336f68..30ade36b03fc0 100644
--- a/llvm/test/Analysis/DependenceAnalysis/PR148435.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/PR148435.ll
@@ -75,8 +75,7 @@ for.end10:                                        ; preds = %for.cond
 define void @f1(ptr %a) {
 ; CHECK-LABEL: 'f1'
 ; CHECK-NEXT:  Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
-; CHECK-NEXT:    da analyze - none!
-; Note: the second patch for PR148435 modifies the above CHECK to correct "output [*]".
+; CHECK-NEXT:    da analyze - output [*]!
 ;
 entry:
   br label %loop
diff --git a/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec-1.ll b/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec-1.ll
new file mode 100644
index 0000000000000..ef3d78b036bd4
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec-1.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for bug #148435 - SIV test assertion failure
+; This test ensures that testSIV handles the case where neither Src nor Dst
+; expressions contain AddRec after propagation, which can happen when
+; constraints simplify the expressions to non-AddRec forms.
+
+define void @f(ptr %a) {
+; CHECK-LABEL: 'f'
+; CHECK-NEXT:  Src: store i8 42, ptr %idx, align 1 --> Dst: store i8 42, ptr %idx, align 1
+; CHECK-NEXT:    da analyze - output [* *]!
+;
+entry:
+  br label %loop.i.header
+
+loop.i.header:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop.i.latch ]
+  %and.i = and i64 %i, 1
+  br label %loop.j
+
+loop.j:
+  %j = phi i64 [ 0, %loop.i.header ], [ %j.next, %loop.j ]
+  %and.j = and i64 %j, 1
+  %idx = getelementptr [2 x [2 x i8]], ptr %a, i64 0, i64 %and.i, i64 %and.j
+  store i8 42, ptr %idx
+  %j.next = add i64 %j, 1
+  %exitcond.j = icmp eq i64 %j.next, 100
+ br i1 %exitcond.j, label %loop.i.latch, label %loop.j
+
+loop.i.latch:
+  %i.next = add i64 %i, 1
+  %exitcond.i = icmp eq i64 %i.next, 100
+  br i1 %exitcond.i, label %exit, label %loop.i.header
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec.ll b/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec.ll
new file mode 100644
index 0000000000000..d32e5225f4e29
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for wrapping AddRec detection in DependenceAnalysis.
+; This ensures that AddRec expressions that wrap (creating cyclic rather than
+; linear patterns) are rejected from SIV analysis and treated conservatively.
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+
+
+; This test case has a clear dependence pattern that was incorrectly reported as "none!"
+; The issue: {false,+,true} in i1 arithmetic creates pattern (0,1,0,1,0,1,...).
+; - i=0: a[0][0][0], i=1: a[0][1][1], i=2: a[0][0][0], i=3: a[0][1][1], ...
+; - Clear dependencies at distances 2, 4, 6 between iterations accessing same locations.
+; - Strong SIV test was missing these due to treating wrapping pattern as linear.
+define void @test_wrapping_i1_addrec(ptr %a) {
+; CHECK-LABEL: 'test_wrapping_i1_addrec'
+; CHECK-NEXT:  Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
+; CHECK-NEXT:    da analyze - output [*]!
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %and = and i64 %i, 1
+  %idx = getelementptr inbounds [4 x [4 x i8]], ptr %a, i64 0, i64 %and, i64 %and
+  store i8 0, ptr %idx
+  %i.next = add i64 %i, 1
+  %exitcond.not = icmp slt i64 %i.next, 8
+  br i1 %exitcond.not, label %loop, label %exit
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/wrapping-maxbtc.ll b/llvm/test/Analysis/DependenceAnalysis/wrapping-maxbtc.ll
new file mode 100644
index 0000000000000..213d8f425b9ed
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/wrapping-maxbtc.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for wrapping AddRec detection using constant max backedge taken count.
+; This ensures that wrapping detection works even when exact BTC is not available
+; but we can get a conservative upper bound.
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+; Test case where loop has variable bound but SCEV can provide max BTC estimate.
+; The i2 type can only represent 0,1,2,3, so if we iterate more than 4 times
+; with step=1, we'll get wrapping: 0,1,2,3,0,1,2,3...
+define void @test_wrapping_with_maxbtc(ptr %a, i32 %n) {
+; CHECK-LABEL: 'test_wrapping_with_maxbtc'
+; CHECK-NEXT:  Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
+; CHECK-NEXT:    da analyze - output [*]!
+;
+entry:
+  %bound = and i32 %n, 1023    ; Limit n to at most 1024
+  %cmp = icmp sgt i32 %bound, 0
+  br i1 %cmp, label %loop, label %exit
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+  %i.narrow = trunc i32 %i to i2      ; Only 2 bits: wraps after 4 iterations
+  %zext = zext i2 %i.narrow to i64
+  %idx = getelementptr inbounds [8 x i8], ptr %a, i64 0, i64 %zext
+  store i8 0, ptr %idx
+  %i.next = add i32 %i, 1
+  %exitcond = icmp slt i32 %i.next, %bound  ; Variable upper bound
+  br i1 %exitcond, label %loop, label %exit
+
+exit:
+  ret void
+}