[llvm] [DA] Fix zero coeff bug in Strong SIV test with runtime assumptions (PR #155037)

Tue Sep 9 12:37:09 PDT 2025

https://github.com/sebpop updated https://github.com/llvm/llvm-project/pull/155037

>From ca28604e723af7940ab764a139fa512f5fa59ebc Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Sat, 23 Aug 2025 09:33:00 -0500
Subject: [PATCH 1/2] [DA] Simplify runtime predicate collection and extend to
 all dependence tests

Previously, predicates were collected using a local `Assume` vector.  This patch
removes local `Assume` vector, uses class member `Assumptions` instead, and
extends predicate collection to all dependence tests.
---
 .../llvm/Analysis/DependenceAnalysis.h        | 31 +++++++-
 llvm/lib/Analysis/DependenceAnalysis.cpp      | 71 +++++++++++--------
 .../DependenceAnalysis/DifferentOffsets.ll    |  5 --
 .../DependenceAnalysis/MIVCheckConst.ll       |  3 -
 4 files changed, 69 insertions(+), 41 deletions(-)

diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index f66c79d915665..300cfb73af5c1 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -285,7 +285,8 @@ class LLVM_ABI FullDependence final : public Dependence {
 class DependenceInfo {
 public:
   DependenceInfo(Function *F, AAResults *AA, ScalarEvolution *SE, LoopInfo *LI)
-      : AA(AA), SE(SE), LI(LI), F(F) {}
+      : AA(AA), SE(SE), LI(LI), F(F), Assumptions({}, *SE),
+        UnderRuntimeAssumptions(false) {}
 
   /// Handle transitive invalidation when the cached analysis results go away.
   LLVM_ABI bool invalidate(Function &F, const PreservedAnalyses &PA,
@@ -355,7 +356,33 @@ class DependenceInfo {
   ScalarEvolution *SE;
   LoopInfo *LI;
   Function *F;
-  SmallVector<const SCEVPredicate *, 4> Assumptions;
+
+  /// Runtime assumptions collected during dependence analysis.
+  ///
+  /// The dependence analysis employs a cascade of tests from simple to complex:
+  /// ZIV -> SIV (Strong/Weak-Crossing/Weak-Zero) -> RDIV -> MIV -> Banerjee.
+  /// Each test attempts to characterize the dependence with increasing
+  /// precision.
+  ///
+  /// Assumption Management Strategy:
+  /// - Each test may require runtime assumptions (e.g., "coefficient != 0")
+  ///   to provide precise analysis.
+  /// - If UnderRuntimeAssumptions=true: tests can add assumptions and continue.
+  /// - If UnderRuntimeAssumptions=false: tests that need assumptions fail
+  ///   gracefully, allowing more complex tests to attempt analysis.
+  /// - Only assumptions from successful tests contribute to the final result.
+  /// - SCEVUnionPredicate automatically deduplicates redundant assumptions.
+  ///
+  /// This design ensures:
+  /// 1. Simpler tests get priority (better performance).
+  /// 2. Complex tests serve as fallbacks when simple tests fail.
+  /// 3. No unnecessary runtime checks from failed test attempts.
+  /// 4. Maintains the intended cascade behavior of the dependence analysis.
+  SCEVUnionPredicate Assumptions;
+
+  /// Indicates whether runtime assumptions are collected during the analysis.
+  /// When false, dependence tests that would require runtime assumptions fail.
+  bool UnderRuntimeAssumptions;
 
   /// Subscript - This private struct represents a pair of subscripts from
   /// a pair of potentially multi-dimensional array references. We use a
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index da86a8d2cc9c0..ad5415d2f765a 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3567,7 +3567,7 @@ bool DependenceInfo::invalidate(Function &F, const PreservedAnalyses &PA,
 }
 
 SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() const {
-  return SCEVUnionPredicate(Assumptions, *SE);
+  return Assumptions;
 }
 
 // depends -
@@ -3584,7 +3584,12 @@ SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() const {
 std::unique_ptr<Dependence>
 DependenceInfo::depends(Instruction *Src, Instruction *Dst,
                         bool UnderRuntimeAssumptions) {
-  SmallVector<const SCEVPredicate *, 4> Assume;
+  // Set the flag for whether we're allowed to add runtime assumptions.
+  this->UnderRuntimeAssumptions = UnderRuntimeAssumptions;
+
+  // Clear any previous assumptions
+  Assumptions = SCEVUnionPredicate({}, *SE);
+
   bool PossiblyLoopIndependent = true;
   if (Src == Dst)
     PossiblyLoopIndependent = false;
@@ -3596,8 +3601,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
     // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
     LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
-    return std::make_unique<Dependence>(Src, Dst,
-                                        SCEVUnionPredicate(Assume, *SE));
+    return std::make_unique<Dependence>(Src, Dst, getRuntimeAssumptions());
   }
 
   const MemoryLocation &DstLoc = MemoryLocation::get(Dst);
@@ -3608,8 +3612,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   case AliasResult::PartialAlias:
     // cannot analyse objects if we don't understand their aliasing.
     LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
-    return std::make_unique<Dependence>(Src, Dst,
-                                        SCEVUnionPredicate(Assume, *SE));
+    return std::make_unique<Dependence>(Src, Dst, getRuntimeAssumptions());
   case AliasResult::NoAlias:
     // If the objects noalias, they are distinct, accesses are independent.
     LLVM_DEBUG(dbgs() << "no alias\n");
@@ -3623,8 +3626,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
     // The dependence test gets confused if the size of the memory accesses
     // differ.
     LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
-    return std::make_unique<Dependence>(Src, Dst,
-                                        SCEVUnionPredicate(Assume, *SE));
+    return std::make_unique<Dependence>(Src, Dst, getRuntimeAssumptions());
   }
 
   Value *SrcPtr = getLoadStorePointerOperand(Src);
@@ -3643,8 +3645,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
     // We check this upfront so we don't crash in cases where getMinusSCEV()
     // returns a SCEVCouldNotCompute.
     LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
-    return std::make_unique<Dependence>(Src, Dst,
-                                        SCEVUnionPredicate(Assume, *SE));
+    return std::make_unique<Dependence>(Src, Dst, getRuntimeAssumptions());
   }
 
   // Even if the base pointers are the same, they may not be loop-invariant. It
@@ -3656,8 +3657,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   if (!isLoopInvariant(SrcBase, SrcLoop) ||
       !isLoopInvariant(DstBase, DstLoop)) {
     LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
-    return std::make_unique<Dependence>(Src, Dst,
-                                        SCEVUnionPredicate(Assume, *SE));
+    return std::make_unique<Dependence>(Src, Dst, getRuntimeAssumptions());
   }
 
   uint64_t EltSize = SrcLoc.Size.toRaw();
@@ -3665,35 +3665,40 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
   const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase);
 
   // Check that memory access offsets are multiples of element sizes.
-  if (!SE->isKnownMultipleOf(SrcEv, EltSize, Assume) ||
-      !SE->isKnownMultipleOf(DstEv, EltSize, Assume)) {
+  SmallVector<const SCEVPredicate *, 4> TempAssumptions;
+  if (!SE->isKnownMultipleOf(SrcEv, EltSize, TempAssumptions) ||
+      !SE->isKnownMultipleOf(DstEv, EltSize, TempAssumptions)) {
     LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
-    return std::make_unique<Dependence>(Src, Dst,
-                                        SCEVUnionPredicate(Assume, *SE));
+    return std::make_unique<Dependence>(Src, Dst, getRuntimeAssumptions());
   }
 
-  if (!Assume.empty()) {
-    if (!UnderRuntimeAssumptions)
-      return std::make_unique<Dependence>(Src, Dst,
-                                          SCEVUnionPredicate(Assume, *SE));
-    // Add non-redundant assumptions.
-    unsigned N = Assumptions.size();
-    for (const SCEVPredicate *P : Assume) {
-      bool Implied = false;
-      for (unsigned I = 0; I != N && !Implied; I++)
-        if (Assumptions[I]->implies(P, *SE))
-          Implied = true;
-      if (!Implied)
-        Assumptions.push_back(P);
+  // Add any new assumptions from the isKnownMultipleOf calls
+  if (!TempAssumptions.empty()) {
+    if (UnderRuntimeAssumptions) {
+      SmallVector<const SCEVPredicate *, 4> NewPreds(
+          Assumptions.getPredicates());
+      NewPreds.append(TempAssumptions.begin(), TempAssumptions.end());
+      const_cast<DependenceInfo *>(this)->Assumptions =
+          SCEVUnionPredicate(NewPreds, *SE);
+    } else {
+      // Runtime assumptions needed but not allowed.
+      // Return confused dependence since we cannot proceed with precise
+      // analysis.
+      LLVM_DEBUG(dbgs() << "Runtime assumptions needed for offset analysis but "
+                           "not allowed\n");
+      return std::make_unique<Dependence>(Src, Dst, getRuntimeAssumptions());
     }
   }
 
+  // Assert that we haven't added runtime assumptions when not allowed
+  assert(UnderRuntimeAssumptions || Assumptions.isAlwaysTrue());
+
   establishNestingLevels(Src, Dst);
   LLVM_DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
   LLVM_DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
 
-  FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE),
-                        PossiblyLoopIndependent, CommonLevels);
+  FullDependence Result(Src, Dst, Assumptions, PossiblyLoopIndependent,
+                        CommonLevels);
   ++TotalArrayPairs;
 
   unsigned Pairs = 1;
@@ -4036,6 +4041,10 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
       return nullptr;
   }
 
+  // Assert that we haven't added runtime assumptions when not allowed
+  assert(UnderRuntimeAssumptions || Assumptions.isAlwaysTrue());
+
+  Result.Assumptions = getRuntimeAssumptions();
   return std::make_unique<FullDependence>(std::move(Result));
 }
 
diff --git a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
index d9ccea55dd478..719a62a3d5113 100644
--- a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll
@@ -34,8 +34,6 @@ define i32 @alias_with_parametric_offset(ptr nocapture %A, i64 %n) {
 ; CHECK-NEXT:    Equal predicate: (zext i2 (trunc i64 %n to i2) to i64) == 0
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %A, align 1 --> Dst: %0 = load i32, ptr %A, align 1
 ; CHECK-NEXT:    da analyze - none!
-; CHECK-NEXT:  Runtime Assumptions:
-; CHECK-NEXT:  Equal predicate: (zext i2 (trunc i64 %n to i2) to i64) == 0
 ;
 entry:
   %arrayidx = getelementptr inbounds i8, ptr %A, i64 %n
@@ -56,7 +54,6 @@ define i32 @alias_with_parametric_expr(ptr nocapture %A, i64 %n, i64 %m) {
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx1, align 1 --> Dst: %0 = load i32, ptr %arrayidx1, align 1
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Runtime Assumptions:
-; CHECK-NEXT:  Equal predicate: (zext i2 ((trunc i64 %m to i2) + (-2 * (trunc i64 %n to i2))) to i64) == 0
 ; CHECK-NEXT:  Equal predicate: (zext i2 (-2 + (trunc i64 %m to i2)) to i64) == 0
 ;
 entry:
@@ -81,8 +78,6 @@ define i32 @gep_i8_vs_i32(ptr nocapture %A, i64 %n, i64 %m) {
 ; CHECK-NEXT:    Equal predicate: (zext i2 (trunc i64 %n to i2) to i64) == 0
 ; CHECK-NEXT:  Src: store i32 42, ptr %arrayidx1, align 4 --> Dst: store i32 42, ptr %arrayidx1, align 4
 ; CHECK-NEXT:    da analyze - none!
-; CHECK-NEXT:  Runtime Assumptions:
-; CHECK-NEXT:  Equal predicate: (zext i2 (trunc i64 %n to i2) to i64) == 0
 ;
 entry:
   %arrayidx0 = getelementptr inbounds i8, ptr %A, i64 %n
diff --git a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll
index b498d70648bad..bb6d2d7c4c8f2 100644
--- a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll
@@ -50,9 +50,6 @@ define void @test(ptr %A, ptr %B, i1 %arg, i32 %n, i32 %m) #0 align 2 {
 ; CHECK-NEXT:    Equal predicate: (8 * (zext i4 (trunc i32 %v1 to i4) to i32))<nuw><nsw> == 0
 ; CHECK-NEXT:  Src: %v32 = load <32 x i32>, ptr %v30, align 128 --> Dst: %v32 = load <32 x i32>, ptr %v30, align 128
 ; CHECK-NEXT:    da analyze - consistent input [0 S S]!
-; CHECK-NEXT:  Runtime Assumptions:
-; CHECK-NEXT:  Equal predicate: (zext i7 (4 * (trunc i32 %v1 to i7) * (1 + (trunc i32 %n to i7))) to i32) == 0
-; CHECK-NEXT:  Equal predicate: (8 * (zext i4 (trunc i32 %v1 to i4) to i32))<nuw><nsw> == 0
 ;
 entry:
   %v1 = load i32, ptr %B, align 4

>From f7365db0717083abb49a569d5f508bcbbbc74f18 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Sat, 23 Aug 2025 09:34:44 -0500
Subject: [PATCH 2/2] [DA] Fix zero coefficient bug in Strong SIV test using
 runtime assumptions (#149991)

Fix GitHub issue #149991 where Strong SIV test incorrectly concludes 'none'
for symbolic coefficients that could be zero, leading to 0/0 undefined behavior.

The issue occurs in subscripts like {base,+,coeff} where coeff is symbolic:
- When coeff != 0: different iterations access different locations
- When coeff = 0: all iterations access the same location (many dependencies)

The Strong SIV test's Delta=0 case assumed 0/X=0 where X is the coefficient,
but when X could be zero, we have 0/0 which is undefined. The analysis needs
to be conservative when the coefficient might be zero.

Solution:
When coefficient is SCEVUnknown and cannot be proven non-zero at compile time,
use SCEV range analysis to attempt proving coefficient > 0. If this fails,
add a runtime assumption 'coeff > 0' to the dependence result.

This allows precise analysis when possible (none under assumption coeff > 0)
while maintaining correctness by exposing the required assumption.

Test cases:
- zero-coefficient.ll: New test for the reported bug
- DADelin.ll: Updated to expect runtime assumptions for symbolic coefficients
---
 llvm/lib/Analysis/DependenceAnalysis.cpp      | 35 ++++++++++++++++++-
 .../Analysis/DependenceAnalysis/DADelin.ll    |  4 +++
 .../DependenceAnalysis/zero-coefficient.ll    | 30 ++++++++++++++++
 3 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Analysis/DependenceAnalysis/zero-coefficient.ll

diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index ad5415d2f765a..052d8636677b5 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -1282,7 +1282,40 @@ bool DependenceInfo::strongSIVtest(const SCEV *Coeff, const SCEV *SrcConst,
       Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
     ++StrongSIVsuccesses;
   } else if (Delta->isZero()) {
-    // since 0/X == 0
+    // Check if coefficient could be zero. If so, 0/0 is undefined and we
+    // cannot conclude that only same-iteration dependencies exist.
+    // When coeff=0, all iterations access the same location.
+    if (isa<SCEVUnknown>(Coeff) && !SE->isKnownNonZero(Coeff)) {
+      // Use SCEV range analysis to prove coefficient != 0 in loop context.
+      const SCEV *Zero = SE->getZero(Coeff->getType());
+
+      // Ask SCEV's range analysis if it can prove Coeff != Zero.
+      if (SE->isKnownPredicate(ICmpInst::ICMP_NE, Coeff, Zero)) {
+        LLVM_DEBUG(
+            dbgs()
+            << "\t    Coefficient proven non-zero by SCEV range analysis\n");
+      } else {
+        // Cannot prove at compile time, would need runtime assumption.
+        if (UnderRuntimeAssumptions) {
+          const SCEVPredicate *Pred =
+              SE->getComparePredicate(ICmpInst::ICMP_NE, Coeff, Zero);
+          SmallVector<const SCEVPredicate *, 4> NewPreds(
+              Assumptions.getPredicates());
+          NewPreds.push_back(Pred);
+          const_cast<DependenceInfo *>(this)->Assumptions =
+              SCEVUnionPredicate(NewPreds, *SE);
+          LLVM_DEBUG(dbgs() << "\t    Added runtime assumption: " << *Coeff
+                            << " != 0\n");
+        } else {
+          // Cannot add runtime assumptions, this test cannot handle this case.
+          // Let more complex tests try.
+          LLVM_DEBUG(dbgs() << "\t    Would need runtime assumption " << *Coeff
+                            << " != 0, but not allowed. Failing this test.\n");
+          return false;
+        }
+      }
+    }
+    // since 0/X == 0 (where X is known non-zero)
     Result.DV[Level].Distance = Delta;
     NewConstraint.setDistance(Delta, CurLoop);
     Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
index 8f94a455d3724..232ef75706556 100644
--- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
@@ -651,6 +651,8 @@ define void @coeff_may_negative(ptr %a, i32 %k) {
 ; CHECK-NEXT:    da analyze - output [*|<]!
 ; CHECK-NEXT:  Src: store i8 42, ptr %idx.1, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
 ; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Runtime Assumptions:
+; CHECK-NEXT:  Compare predicate: %k ne) 0
 ;
 entry:
   br label %loop
@@ -690,6 +692,8 @@ define void @coeff_positive(ptr %a, i32 %k) {
 ; CHECK-NEXT:    da analyze - output [*|<]!
 ; CHECK-NEXT:  Src: store i8 42, ptr %idx.1, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
 ; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Runtime Assumptions:
+; CHECK-NEXT:  Compare predicate: %k ne) 0
 ;
 entry:
   br label %loop
diff --git a/llvm/test/Analysis/DependenceAnalysis/zero-coefficient.ll b/llvm/test/Analysis/DependenceAnalysis/zero-coefficient.ll
new file mode 100644
index 0000000000000..6cc94d7d5590a
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/zero-coefficient.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for GitHub issue #149991: Strong SIV test with symbolic coefficient
+; that could be zero. Fixed using runtime assumptions: assume coefficient != 0.
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define void @test_zero_coefficient(ptr %a, i64 %k) {
+; CHECK-LABEL: 'test_zero_coefficient'
+; CHECK-NEXT:  Src: store i8 42, ptr %idx, align 1 --> Dst: store i8 42, ptr %idx, align 1
+; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:  Runtime Assumptions:
+; CHECK-NEXT:  Compare predicate: %k ne) 0
+;
+entry:
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %subscript = mul i64 %i, %k  ; When %k=0, all iterations access %a[0]
+  %idx = getelementptr i8, ptr %a, i64 %subscript
+  store i8 42, ptr %idx
+  %i.next = add i64 %i, 1
+  %cond.exit = icmp eq i64 %i.next, 100
+  br i1 %cond.exit, label %exit, label %loop
+
+exit:
+  ret void
+}