[llvm] [DependenceAnalysis] Fix incorrect analysis of wrapping AddRec expressions (PR #154982)
Sebastian Pop via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 25 09:21:30 PDT 2025
https://github.com/sebpop updated https://github.com/llvm/llvm-project/pull/154982
>From 6c358c66ea2712391f5fb87f079bd7da33bc9e28 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Thu, 21 Aug 2025 14:39:49 -0500
Subject: [PATCH 1/7] [DependenceAnalysis] Fix SIV test crash when no AddRec
after propagation
Fixes GitHub issue #148435 where testSIV() would hit an assertion failure
when neither Src nor Dst expressions contain AddRec after constraint
propagation. This can occur when propagation simplifies expressions to
non-AddRec forms. The subscript is effectively a zero induction variable.
The fix falls back to ZIV analysis in this case, treating the simplified
expressions as loop-invariant, which is the correct behavior when all
induction variable references have been eliminated through propagation.
The patch also fixes a MIV case that may decay into a ZIV test.
Add missing NonLinear case to switch statements in propagation code
to prevent 'bad subscript classification' crash when subscripts are
reclassified as NonLinear after constraint propagation.
Add regression test to prevent future occurrences of this issue.
---
llvm/lib/Analysis/DependenceAnalysis.cpp | 61 +++++++++---
.../Analysis/DependenceAnalysis/PR148435.ll | 95 +++++++++++++++++++
.../DependenceAnalysis/bounds-check.ll | 29 ++++++
3 files changed, 170 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/Analysis/DependenceAnalysis/PR148435.ll
create mode 100644 llvm/test/Analysis/DependenceAnalysis/bounds-check.ll
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index f33e04e804e3d..c6c2bc9b4ba25 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -873,6 +873,9 @@ void DependenceInfo::collectCommonLoops(const SCEV *Expression,
SmallBitVector &Loops) const {
while (LoopNest) {
unsigned Level = LoopNest->getLoopDepth();
+ LLVM_DEBUG(dbgs() << "MaxLevels = " << MaxLevels << "\n");
+ LLVM_DEBUG(dbgs() << "Level = " << Level << "\n");
+ assert(Level <= MaxLevels && "Level larger than MaxLevels.");
if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
Loops.set(Level);
LoopNest = LoopNest->getParentLoop();
@@ -959,6 +962,10 @@ bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
if (!AddRec)
return isLoopInvariant(Expr, LoopNest);
+ const SCEV *Step = AddRec->getStepRecurrence(*SE);
+ if (!isLoopInvariant(Step, LoopNest))
+ return false;
+
// The AddRec must depend on one of the containing loops. Otherwise,
// mapSrcLoop and mapDstLoop return indices outside the intended range. This
// can happen when a subscript in one loop references an IV from a sibling
@@ -970,14 +977,16 @@ bool DependenceInfo::checkSubscript(const SCEV *Expr, const Loop *LoopNest,
if (!L)
return false;
+ unsigned Level = IsSrc ? mapSrcLoop(L) : mapDstLoop(L);
+ // Check that the mapped loop index is within bounds for the SmallBitVector.
+ // This can happen when loop depths exceed MaxLevels due to the mapping
+ // algorithm.
+
+ LLVM_DEBUG(dbgs() << "MaxLevels = " << MaxLevels << "\n");
+ LLVM_DEBUG(dbgs() << "Level = " << Level << "\n");
+ assert(Level <= MaxLevels && "Level larger than MaxLevels.");
+ Loops.set(Level);
const SCEV *Start = AddRec->getStart();
- const SCEV *Step = AddRec->getStepRecurrence(*SE);
- if (!isLoopInvariant(Step, LoopNest))
- return false;
- if (IsSrc)
- Loops.set(mapSrcLoop(AddRec->getLoop()));
- else
- Loops.set(mapDstLoop(AddRec->getLoop()));
return checkSubscript(Start, LoopNest, Loops, IsSrc);
}
@@ -2281,8 +2290,14 @@ bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
Result, NewConstraint) ||
gcdMIVtest(Src, Dst, Result);
}
- llvm_unreachable("SIV test expected at least one AddRec");
- return false;
+ // If neither expression is an AddRec, this means propagation has simplified
+ // them to non-AddRec forms. In this case, fall back to ZIV analysis since
+ // the expressions are effectively loop-invariant.
+ LLVM_DEBUG(dbgs() << " falling back to ZIV test due to no AddRec\n");
+ // Set to first valid level to avoid Level=0 causing DV[-1] access.
+ // See comment in establishNestingLevels.
+ Level = 1;
+ return testZIV(Src, Dst, Result);
}
// testRDIV -
@@ -2343,8 +2358,14 @@ bool DependenceInfo::testRDIV(const SCEV *Src, const SCEV *Dst,
SrcLoop = DstAddRec->getLoop();
} else
llvm_unreachable("RDIV reached by surprising SCEVs");
- } else
- llvm_unreachable("RDIV expected at least one AddRec");
+ } else {
+ // If neither expression is an AddRec, this means propagation has simplified
+ // them to non-AddRec forms. Fall back to ZIV analysis since the expressions
+ // are effectively loop-invariant.
+ LLVM_DEBUG(
+ dbgs() << " RDIV falling back to ZIV test due to no AddRec\n");
+ return testZIV(Src, Dst, Result);
+ }
return exactRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, SrcLoop, DstLoop,
Result) ||
gcdMIVtest(Src, Dst, Result) ||
@@ -3821,7 +3842,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
break;
case Subscript::SIV: {
LLVM_DEBUG(dbgs() << ", SIV\n");
- unsigned Level;
+ unsigned Level = 0;
const SCEV *SplitIter = nullptr;
if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
SplitIter))
@@ -3872,12 +3893,17 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
for (unsigned SJ : Sivs.set_bits()) {
LLVM_DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
// SJ is an SIV subscript that's part of the current coupled group
- unsigned Level;
+ unsigned Level = 0;
const SCEV *SplitIter = nullptr;
LLVM_DEBUG(dbgs() << "SIV\n");
if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
SplitIter))
return nullptr;
+
+ LLVM_DEBUG(dbgs() << "MaxLevels = " << MaxLevels << "\n");
+ LLVM_DEBUG(dbgs() << "Level = " << Level << "\n");
+ assert(Level <= MaxLevels && "Level larger than MaxLevels.");
+
ConstrainedLevels.set(Level);
if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
if (Constraints[Level].isEmpty()) {
@@ -4155,7 +4181,7 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
for (unsigned SI : Separable.set_bits()) {
switch (Pair[SI].Classification) {
case Subscript::SIV: {
- unsigned Level;
+ unsigned Level = 0;
const SCEV *SplitIter = nullptr;
(void)testSIV(Pair[SI].Src, Pair[SI].Dst, Level, Result, NewConstraint,
SplitIter);
@@ -4195,12 +4221,17 @@ const SCEV *DependenceInfo::getSplitIteration(const Dependence &Dep,
bool Changed = false;
for (unsigned SJ : Sivs.set_bits()) {
// SJ is an SIV subscript that's part of the current coupled group
- unsigned Level;
+ unsigned Level = 0;
const SCEV *SplitIter = nullptr;
(void)testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level, Result, NewConstraint,
SplitIter);
if (Level == SplitLevel && SplitIter)
return SplitIter;
+
+ LLVM_DEBUG(dbgs() << "MaxLevels = " << MaxLevels << "\n");
+ LLVM_DEBUG(dbgs() << "Level = " << Level << "\n");
+ assert(Level <= MaxLevels && "Level larger than MaxLevels.");
+
ConstrainedLevels.set(Level);
if (intersectConstraints(&Constraints[Level], &NewConstraint))
Changed = true;
diff --git a/llvm/test/Analysis/DependenceAnalysis/PR148435.ll b/llvm/test/Analysis/DependenceAnalysis/PR148435.ll
new file mode 100644
index 0000000000000..1633a91336f68
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/PR148435.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for bug #148435 - SIV test assertion failure.
+; This test ensures that testSIV handles the case where neither Src nor Dst
+; expressions contain AddRec after propagation, which can happen when
+; constraints simplify the expressions to non-AddRec forms.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @_Z1cb(ptr %a) {
+; CHECK-LABEL: '_Z1cb'
+; CHECK-NEXT: Src: store i8 0, ptr %arrayidx9, align 1 --> Dst: store i8 0, ptr %arrayidx9, align 1
+; CHECK-NEXT: da analyze - output [*]!
+;
+entry:
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv23 = phi i64 [ 0, %entry ], [ %indvars.iv.next24, %for.body ]
+ %idxprom = and i64 %indvars.iv23, 1
+ %arrayidx9 = getelementptr inbounds [0 x [12 x [12 x i8]]], ptr %a, i64 0, i64 %idxprom, i64 0, i64 %indvars.iv23
+ store i8 0, ptr %arrayidx9, align 1
+ %indvars.iv.next24 = add i64 %indvars.iv23, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next24, 0
+ br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+ at a = external global [0 x [12 x [12 x i8]]], align 1
+
+define void @test_siv_no_addrec(i1 %d, i32 %b) {
+; CHECK-LABEL: 'test_siv_no_addrec'
+; CHECK-NEXT: Src: store i8 0, ptr %arrayidx7, align 1 --> Dst: store i8 0, ptr %arrayidx7, align 1
+; CHECK-NEXT: da analyze - output [* *]!
+;
+entry:
+ %conv.val = select i1 %d, i16 1, i16 0
+ br label %for.cond
+
+for.cond: ; preds = %for.inc8, %entry
+ %e.0 = phi i32 [ %b, %entry ], [ %inc9, %for.inc8 ]
+ %cmp = icmp ult i32 %e.0, 10
+ br i1 %cmp, label %for.cond1, label %for.end10
+
+for.cond1: ; preds = %for.inc, %for.cond
+ %f.0 = phi i16 [ %conv.val, %for.cond ], [ %add, %for.inc ]
+ %cmp2 = icmp slt i16 %f.0, 10
+ br i1 %cmp2, label %for.body4, label %for.inc8
+
+for.body4: ; preds = %for.cond1
+ %sub = add i32 %e.0, -3
+ %idxprom = zext i32 %sub to i64
+ %idxprom5 = sext i16 %f.0 to i64
+ %idxprom6 = zext i32 %e.0 to i64
+ %arrayidx7 = getelementptr inbounds [0 x [12 x [12 x i8]]], ptr @a, i64 0, i64 %idxprom, i64 %idxprom5, i64 %idxprom6
+ store i8 0, ptr %arrayidx7, align 1
+ br label %for.inc
+
+for.inc: ; preds = %for.body4
+ %add = add i16 %f.0, 2
+ br label %for.cond1
+
+for.inc8: ; preds = %for.cond1
+ %inc9 = add i32 %e.0, 1
+ br label %for.cond
+
+for.end10: ; preds = %for.cond
+ ret void
+}
+
+define void @f1(ptr %a) {
+; CHECK-LABEL: 'f1'
+; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
+; CHECK-NEXT: da analyze - none!
+; Note: the second patch for PR148435 modifies the above CHECK to correct "output [*]".
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+ %and = and i64 %i, 1
+ %idx = getelementptr inbounds [4 x [4 x i8]], ptr %a, i64 0, i64 %and, i64 %and
+ store i8 0, ptr %idx
+ %i.next = add i64 %i, 1
+ %exitcond.not = icmp slt i64 %i.next, 8
+ br i1 %exitcond.not, label %loop, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/bounds-check.ll b/llvm/test/Analysis/DependenceAnalysis/bounds-check.ll
new file mode 100644
index 0000000000000..dca86e5e55643
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/bounds-check.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for SmallBitVector bounds checking bug in DependenceAnalysis.
+; This test ensures that loop index mapping functions don't cause out-of-bounds
+; access to SmallBitVector when loop depths exceed MaxLevels.
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define void @bounds_check_test(ptr %a) {
+; CHECK-LABEL: 'bounds_check_test'
+; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
+; CHECK-NEXT: da analyze - none!
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+ %and = and i64 %i, 1 ; Creates index 0 or 1
+ %idx = getelementptr inbounds [4 x [4 x i8]], ptr %a, i64 0, i64 %and, i64 %i
+ store i8 0, ptr %idx
+ %i.next = add i64 %i, 1
+ %exitcond.not = icmp slt i64 %i.next, 4
+ br i1 %exitcond.not, label %loop, label %exit
+
+exit:
+ ret void
+}
>From 3e993f87a6eafc8a53ba7e67e9900eb99cd05215 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Sat, 23 Aug 2025 09:33:00 -0500
Subject: [PATCH 2/7] [DA] Simplify runtime predicate collection and extend to
all dependence tests
Previously, predicates were collected using a local `Assume` vector.
This patch:
1. Removes local `Assume` vector, uses class member `Assumptions` instead.
2. Adds a `getNonRedundantAssumptions()` helper for deduplication.
3. Extends predicate collection to all dependence tests.
---
.../llvm/Analysis/DependenceAnalysis.h | 6 ++
llvm/lib/Analysis/DependenceAnalysis.cpp | 81 ++++++++++++-------
2 files changed, 56 insertions(+), 31 deletions(-)
diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h
index f66c79d915665..0db8f5d8eca02 100644
--- a/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -598,6 +598,12 @@ class DependenceInfo {
/// returns NULL.
const SCEVConstant *collectConstantUpperBound(const Loop *l, Type *T) const;
+ /// getNonRedundantAssumptions - Remove redundant assumptions from the
+ /// collection and return a SCEVUnionPredicate with unique assumptions.
+ /// This ensures that each assumption is only present once and that
+ /// stronger assumptions imply weaker ones.
+ SCEVUnionPredicate getNonRedundantAssumptions() const;
+
/// classifyPair - Examines the subscript pair (the Src and Dst SCEVs)
/// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
/// Collects the associated loops in a set.
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index c6c2bc9b4ba25..8bc6ec699549d 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -3569,6 +3569,37 @@ SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() const {
return SCEVUnionPredicate(Assumptions, *SE);
}
+// getNonRedundantAssumptions - Remove redundant assumptions from the collection
+// and return a SCEVUnionPredicate with the unique assumptions. This ensures
+// that each assumption is only present once and that stronger assumptions imply
+// weaker ones, avoiding unnecessary runtime checks.
+SCEVUnionPredicate DependenceInfo::getNonRedundantAssumptions() const {
+ SmallVector<const SCEVPredicate *, 4> UniqueAssumptions;
+
+ for (const SCEVPredicate *P : Assumptions) {
+ bool Implied = false;
+ for (const SCEVPredicate *Existing : UniqueAssumptions) {
+ if (Existing->implies(P, *SE)) {
+ Implied = true;
+ break;
+ }
+ }
+ if (!Implied) {
+ auto I = UniqueAssumptions.begin();
+ while (I != UniqueAssumptions.end()) {
+ if (P->implies(*I, *SE)) {
+ I = UniqueAssumptions.erase(I);
+ } else {
+ ++I;
+ }
+ }
+ UniqueAssumptions.push_back(P);
+ }
+ }
+
+ return SCEVUnionPredicate(UniqueAssumptions, *SE);
+}
+
// depends -
// Returns NULL if there is no dependence.
// Otherwise, return a Dependence with as many details as possible.
@@ -3583,7 +3614,6 @@ SCEVUnionPredicate DependenceInfo::getRuntimeAssumptions() const {
std::unique_ptr<Dependence>
DependenceInfo::depends(Instruction *Src, Instruction *Dst,
bool UnderRuntimeAssumptions) {
- SmallVector<const SCEVPredicate *, 4> Assume;
bool PossiblyLoopIndependent = true;
if (Src == Dst)
PossiblyLoopIndependent = false;
@@ -3595,8 +3625,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
// can only analyze simple loads and stores, i.e., no calls, invokes, etc.
LLVM_DEBUG(dbgs() << "can only handle simple loads and stores\n");
- return std::make_unique<Dependence>(Src, Dst,
- SCEVUnionPredicate(Assume, *SE));
+ return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
}
const MemoryLocation &DstLoc = MemoryLocation::get(Dst);
@@ -3607,8 +3636,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
case AliasResult::PartialAlias:
// cannot analyse objects if we don't understand their aliasing.
LLVM_DEBUG(dbgs() << "can't analyze may or partial alias\n");
- return std::make_unique<Dependence>(Src, Dst,
- SCEVUnionPredicate(Assume, *SE));
+ return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
case AliasResult::NoAlias:
// If the objects noalias, they are distinct, accesses are independent.
LLVM_DEBUG(dbgs() << "no alias\n");
@@ -3622,8 +3650,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
// The dependence test gets confused if the size of the memory accesses
// differ.
LLVM_DEBUG(dbgs() << "can't analyze must alias with different sizes\n");
- return std::make_unique<Dependence>(Src, Dst,
- SCEVUnionPredicate(Assume, *SE));
+ return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
}
Value *SrcPtr = getLoadStorePointerOperand(Src);
@@ -3642,8 +3669,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
// We check this upfront so we don't crash in cases where getMinusSCEV()
// returns a SCEVCouldNotCompute.
LLVM_DEBUG(dbgs() << "can't analyze SCEV with different pointer base\n");
- return std::make_unique<Dependence>(Src, Dst,
- SCEVUnionPredicate(Assume, *SE));
+ return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
}
// Even if the base pointers are the same, they may not be loop-invariant. It
@@ -3655,8 +3681,7 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
if (!isLoopInvariant(SrcBase, SrcLoop) ||
!isLoopInvariant(DstBase, DstLoop)) {
LLVM_DEBUG(dbgs() << "The base pointer is not loop invariant.\n");
- return std::make_unique<Dependence>(Src, Dst,
- SCEVUnionPredicate(Assume, *SE));
+ return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
}
uint64_t EltSize = SrcLoc.Size.toRaw();
@@ -3664,34 +3689,22 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
const SCEV *DstEv = SE->getMinusSCEV(DstSCEV, DstBase);
// Check that memory access offsets are multiples of element sizes.
- if (!SE->isKnownMultipleOf(SrcEv, EltSize, Assume) ||
- !SE->isKnownMultipleOf(DstEv, EltSize, Assume)) {
+ if (!SE->isKnownMultipleOf(SrcEv, EltSize, Assumptions) ||
+ !SE->isKnownMultipleOf(DstEv, EltSize, Assumptions)) {
LLVM_DEBUG(dbgs() << "can't analyze SCEV with different offsets\n");
- return std::make_unique<Dependence>(Src, Dst,
- SCEVUnionPredicate(Assume, *SE));
+ return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
}
- if (!Assume.empty()) {
- if (!UnderRuntimeAssumptions)
- return std::make_unique<Dependence>(Src, Dst,
- SCEVUnionPredicate(Assume, *SE));
- // Add non-redundant assumptions.
- unsigned N = Assumptions.size();
- for (const SCEVPredicate *P : Assume) {
- bool Implied = false;
- for (unsigned I = 0; I != N && !Implied; I++)
- if (Assumptions[I]->implies(P, *SE))
- Implied = true;
- if (!Implied)
- Assumptions.push_back(P);
- }
- }
+ // If runtime assumptions were added but not allowed, return confused
+ // dependence.
+ if (!UnderRuntimeAssumptions && !Assumptions.empty())
+ return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
establishNestingLevels(Src, Dst);
LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n");
LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n");
- FullDependence Result(Src, Dst, SCEVUnionPredicate(Assume, *SE),
+ FullDependence Result(Src, Dst, SCEVUnionPredicate(Assumptions, *SE),
PossiblyLoopIndependent, CommonLevels);
++TotalArrayPairs;
@@ -4040,6 +4053,12 @@ DependenceInfo::depends(Instruction *Src, Instruction *Dst,
return nullptr;
}
+ // If runtime assumptions were added but not allowed, return confused
+ // dependence.
+ if (!UnderRuntimeAssumptions && !Assumptions.empty())
+ return std::make_unique<Dependence>(Src, Dst, getNonRedundantAssumptions());
+
+ Result.Assumptions = getNonRedundantAssumptions();
return std::make_unique<FullDependence>(std::move(Result));
}
>From 8932e0e21399edceb6b58e3d8e41f6c35b51a4fc Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Sun, 24 Aug 2025 01:11:32 -0500
Subject: [PATCH 3/7] [LAA] Fix WAW dependency analysis with negative distances
Previously, LAA would incorrectly classify Write-After-Write dependencies
with negative distances as safe Forward dependencies, allowing inappropriate
vectorization of loops with bidirectional WAW dependencies.
The issue occurred in loops like:
for(int i = 0; i < n; ++i) {
A[(i+1)*4] = 10; // First store
A[i] = 100; // Second store
}
The dependence distance from first store to second store is negative:
{-16,+,-12}. However, this represents a bidirectional WAW dependency
that DependenceAnalysis would report as 'output [<>]!', indicating
dependency in both directions.
This patch fixes LAA to properly detect WAW dependencies with negative
distances as unsafe, preventing incorrect vectorization.
The fix adds a check specifically for Write-After-Write dependencies
before the general negative distance handling, ensuring they are
classified as Unknown (unsafe) rather than Forward (safe).
A proper fix, not implemented here because it would be a major rework of LAA,
is to implement bidirectional dependence checking that computes distances
in both directions and detects inconsistent direction vectors.
---
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 15 +++
.../LoopAccessAnalysis/depend_diff_types.ll | 7 +-
.../forward-loop-carried.ll | 2 +-
.../forward-loop-independent.ll | 2 +-
.../waw-negative-dependence.ll | 109 ++++++++++++++++++
.../AArch64/sve-interleaved-accesses.ll | 75 ++++--------
.../LoopVectorize/interleaved-accesses.ll | 84 ++------------
7 files changed, 163 insertions(+), 131 deletions(-)
create mode 100644 llvm/test/Analysis/LoopAccessAnalysis/waw-negative-dependence.ll
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index bceddd0325276..e522d1392f7f9 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2196,6 +2196,21 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
return Dependence::Unknown;
}
+ // For WAW (Write-After-Write) dependencies, negative distances in one
+ // direction can still represent unsafe dependencies. Since we only check
+ // dependencies in program order (AIdx < BIdx), a negative distance means
+ // the later write accesses memory locations before the earlier write.
+ // However, in a vectorized loop, both writes could execute simultaneously,
+ // potentially causing incorrect behavior. Therefore, WAW with negative
+ // distances should be treated as unsafe.
+ bool IsWriteAfterWrite = (AIsWrite && BIsWrite);
+ if (IsWriteAfterWrite) {
+ LLVM_DEBUG(
+ dbgs() << "LAA: WAW dependence with negative distance is unsafe\n");
+ return CheckCompletelyBeforeOrAfter() ? Dependence::NoDep
+ : Dependence::Unknown;
+ }
+
bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
// Check if the first access writes to a location that is read in a later
// iteration, where the distance between them is not a multiple of a vector
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
index 023a8c056968f..00a2ce7337d0d 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -disable-output -passes='print<access-info>' < %s 2>&1 | FileCheck %s
@@ -449,9 +449,10 @@ exit:
define void @different_type_sizes_forward(ptr %dst) {
; CHECK-LABEL: 'different_type_sizes_forward'
; CHECK-NEXT: loop:
-; CHECK-NEXT: Memory dependences are safe
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unknown data dependence.
; CHECK-NEXT: Dependences:
-; CHECK-NEXT: Forward:
+; CHECK-NEXT: Unknown:
; CHECK-NEXT: store i32 0, ptr %gep.10.iv, align 4 ->
; CHECK-NEXT: store i16 1, ptr %gep.iv, align 2
; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
index adfd19923e921..8b00ff0ad2a59 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-carried.ll
@@ -70,7 +70,7 @@ define void @forward_different_access_sizes(ptr readnone %end, ptr %start) {
; CHECK-NEXT: store i32 0, ptr %gep.2, align 4 ->
; CHECK-NEXT: %l = load i24, ptr %gep.1, align 1
; CHECK-EMPTY:
-; CHECK-NEXT: Forward:
+; CHECK-NEXT: Unknown:
; CHECK-NEXT: store i32 0, ptr %gep.2, align 4 ->
; CHECK-NEXT: store i24 %l, ptr %ptr.iv, align 1
; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll
index 7fc9958dba552..218166526d7c0 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/forward-loop-independent.ll
@@ -35,7 +35,7 @@ define void @f(ptr noalias %A, ptr noalias %B, ptr noalias %C, i64 %N) {
; CHECK-NEXT: store i32 %b_p2, ptr %Aidx_next, align 4 ->
; CHECK-NEXT: %a = load i32, ptr %Aidx, align 4
; CHECK-EMPTY:
-; CHECK-NEXT: Forward:
+; CHECK-NEXT: Unknown:
; CHECK-NEXT: store i32 %b_p2, ptr %Aidx_next, align 4 ->
; CHECK-NEXT: store i32 %b_p1, ptr %Aidx, align 4
; CHECK-EMPTY:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/waw-negative-dependence.ll b/llvm/test/Analysis/LoopAccessAnalysis/waw-negative-dependence.ll
new file mode 100644
index 0000000000000..be49af7d75460
--- /dev/null
+++ b/llvm/test/Analysis/LoopAccessAnalysis/waw-negative-dependence.ll
@@ -0,0 +1,109 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='print<access-info>' -disable-output %s 2>&1 | FileCheck %s
+
+; Test that LAA correctly identifies Write-After-Write dependencies with negative
+; distances as unsafe. Previously, LAA would incorrectly classify negative distance
+; WAW dependencies as safe Forward dependencies, allowing inappropriate vectorization.
+;
+; This corresponds to the loop:
+; for(int i = 0; i < n; ++i) {
+; A[(i+1)*4] = 10; // First store: A[4, 8, 12, 16, ...]
+; A[i] = 100; // Second store: A[0, 1, 2, 3, 4, ...]
+; }
+;
+; The dependence distance from first store to second store is negative:
+; A[i] - A[(i+1)*4] = {0,+,4} - {16,+,16} = {-16,+,-12}
+; However, the dependence from second store to first store in the next iteration
+; would be positive: A[(i+1)*4] - A[i] = {16,+,16} - {0,+,4} = {16,+,12}
+;
+; This bidirectional dependence pattern (negative in one direction, positive in the
+; other) creates a Write-After-Write dependency that is unsafe for vectorization.
+; DependenceAnalysis would report this as "output [<>]!" indicating the complex
+; dependence direction. LAA must detect this as unsafe even when only checking
+; the negative distance direction.
+
+define void @test_waw_negative_dependence(i64 %n, ptr nocapture %A) {
+; CHECK-LABEL: 'test_waw_negative_dependence'
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Unknown:
+; CHECK-NEXT: store i32 10, ptr %arrayidx1, align 4 ->
+; CHECK-NEXT: store i32 100, ptr %arrayidx2, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %cmp8 = icmp sgt i64 %n, 0
+ br i1 %cmp8, label %loop, label %exit
+
+loop:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ ; First store: A[(i+1)*4] = 10
+ %0 = shl nsw i64 %indvars.iv.next, 2 ; (i+1)*4
+ %arrayidx1 = getelementptr inbounds i32, ptr %A, i64 %0
+ store i32 10, ptr %arrayidx1, align 4
+
+ ; Second store: A[i] = 100
+ %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+ store i32 100, ptr %arrayidx2, align 4
+
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Test a similar case but with different stride to ensure the fix is general.
+define void @test_waw_negative_dependence_different_stride(i64 %n, ptr nocapture %A) {
+; CHECK-LABEL: 'test_waw_negative_dependence_different_stride'
+; CHECK-NEXT: loop:
+; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+; CHECK-NEXT: Unknown data dependence.
+; CHECK-NEXT: Dependences:
+; CHECK-NEXT: Unknown:
+; CHECK-NEXT: store i32 10, ptr %arrayidx1, align 4 ->
+; CHECK-NEXT: store i32 100, ptr %arrayidx2, align 4
+; CHECK-EMPTY:
+; CHECK-NEXT: Run-time memory checks:
+; CHECK-NEXT: Grouped accesses:
+; CHECK-EMPTY:
+; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
+; CHECK-NEXT: SCEV assumptions:
+; CHECK-EMPTY:
+; CHECK-NEXT: Expressions re-written:
+;
+entry:
+ %cmp8 = icmp sgt i64 %n, 0
+ br i1 %cmp8, label %loop, label %exit
+
+loop:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ ; First store: A[(i+2)*2] = 10
+ %0 = add nsw i64 %indvars.iv, 2 ; i+2
+ %1 = shl nsw i64 %0, 1 ; (i+2)*2
+ %arrayidx1 = getelementptr inbounds i32, ptr %A, i64 %1
+ store i32 10, ptr %arrayidx1, align 4
+
+ ; Second store: A[i] = 100
+ %arrayidx2 = getelementptr inbounds i32, ptr %A, i64 %indvars.iv
+ store i32 100, ptr %arrayidx2, align 4
+
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index fd0bc0b6c20ef..e68744b8a6f37 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -1158,53 +1158,22 @@ for.end:
define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) #1 {
; CHECK-LABEL: @PR27626_5(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 5)
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -4
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = shl nuw nsw i64 [[TMP3]], 2
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp samesign ult i64 [[TMP2]], [[TMP4]]
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
-; CHECK-NEXT: [[DOTNOT:%.*]] = sub nsw i64 0, [[TMP8]]
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], [[DOTNOT]]
-; CHECK-NEXT: [[TMP11:%.*]] = shl nuw i64 [[N_VEC]], 1
-; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP11]], 3
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Y:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z:%.*]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP21:%.*]] = shl <vscale x 4 x i64> [[TMP10]], splat (i64 1)
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> [[TMP21]], splat (i64 3)
-; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i64 [[TMP7]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], splat (i64 -1)
-; CHECK-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i64> [[VEC_IND]], splat (i64 -3)
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], <vscale x 4 x i64> [[VEC_IND]]
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], <vscale x 4 x i64> [[TMP13]]
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], <vscale x 4 x i64> [[TMP14]]
-; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> [[TMP16]], i32 4, <vscale x 4 x i1> splat (i1 true))
-; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT2]], <vscale x 4 x ptr> [[TMP17]], i32 4, <vscale x 4 x i1> splat (i1 true))
-; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT4]], <vscale x 4 x ptr> [[TMP15]], i32 4, <vscale x 4 x i1> splat (i1 true))
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
-; CHECK: scalar.ph:
+; CHECK: for.body:
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[VECTOR_BODY]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[I]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]]
+; CHECK-NEXT: [[A_I_MINUS_1:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -4
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]]
+; CHECK-NEXT: [[A_I_MINUS_3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 -12
+; CHECK-NEXT: store i32 [[X:%.*]], ptr [[A_I_MINUS_1]], align 4
+; CHECK-NEXT: store i32 [[Y:%.*]], ptr [[A_I_MINUS_3]], align 4
+; CHECK-NEXT: store i32 [[Z:%.*]], ptr [[A_I]], align 4
+; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
+; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[COND]], label [[VECTOR_BODY]], label [[FOR_END:%.*]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
;
entry:
br label %for.body
@@ -1281,21 +1250,21 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[TMP18:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 1)
; CHECK-NEXT: [[TMP19:%.*]] = add nuw nsw <vscale x 4 x i64> [[VEC_IND]], splat (i64 2)
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i16, ptr [[A]], <vscale x 4 x i64> [[TMP18]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP20]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i16> poison), !alias.scope [[META34:![0-9]+]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP20]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i16> poison), !alias.scope [[META32:![0-9]+]]
; CHECK-NEXT: [[TMP21:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER]] to <vscale x 4 x i32>
; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i16, ptr [[A]], <vscale x 4 x i64> [[TMP19]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER4]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP22]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i16> poison), !alias.scope [[META34]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER4]] = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16.nxv4p0(<vscale x 4 x ptr> [[TMP22]], i32 4, <vscale x 4 x i1> splat (i1 true), <vscale x 4 x i16> poison), !alias.scope [[META32]]
; CHECK-NEXT: [[TMP23:%.*]] = call <vscale x 4 x i16> @llvm.vector.splice.nxv4i16(<vscale x 4 x i16> [[VECTOR_RECUR]], <vscale x 4 x i16> [[WIDE_MASKED_GATHER4]], i32 -1)
; CHECK-NEXT: [[TMP24:%.*]] = sext <vscale x 4 x i16> [[TMP23]] to <vscale x 4 x i32>
; CHECK-NEXT: [[TMP25:%.*]] = sext <vscale x 4 x i16> [[WIDE_MASKED_GATHER4]] to <vscale x 4 x i32>
; CHECK-NEXT: [[TMP26:%.*]] = mul nsw <vscale x 4 x i32> [[TMP24]], [[TMP21]]
; CHECK-NEXT: [[TMP27:%.*]] = mul nsw <vscale x 4 x i32> [[TMP26]], [[TMP25]]
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT: store <vscale x 4 x i32> [[TMP27]], ptr [[TMP28]], align 4, !alias.scope [[META37:![0-9]+]], !noalias [[META34]]
+; CHECK-NEXT: store <vscale x 4 x i32> [[TMP27]], ptr [[TMP28]], align 4, !alias.scope [[META35:![0-9]+]], !noalias [[META32]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP37:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP30]], 2
@@ -1388,7 +1357,7 @@ define void @interleave_deinterleave_factor3(ptr writeonly noalias %dst, ptr rea
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -1475,7 +1444,7 @@ define void @interleave_deinterleave(ptr writeonly noalias %dst, ptr readonly %a
; CHECK-NEXT: store <vscale x 16 x i32> [[INTERLEAVED_VEC13]], ptr [[TMP21]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -1584,7 +1553,7 @@ define void @interleave_deinterleave_reverse(ptr noalias nocapture readonly %A,
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
; CHECK-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
index add58758788f9..aab21965d6877 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -1348,82 +1348,20 @@ for.end:
define void @PR27626_5(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) {
; CHECK-LABEL: @PR27626_5(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 5)
-; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[SMAX]], -4
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp slt i64 [[N]], 10
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 9223372036854775804
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i64 [[N_VEC]], 1
-; CHECK-NEXT: [[IND_END:%.*]] = or disjoint i64 [[TMP3]], 3
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 3, i64 5, i64 7, i64 9>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = shl i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP7:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 -3)
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP12]], i64 12
-; CHECK-NEXT: [[TMP33:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP33]], i64 20
-; CHECK-NEXT: [[TMP34:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP34]], i64 28
-; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP35]], i64 36
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP7]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP7]], i64 1
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP7]], i64 2
-; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP7]], i64 3
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP20]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP22]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP8]], i64 1
-; CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP8]], i64 2
-; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i64> [[TMP8]], i64 3
-; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP28]]
-; CHECK-NEXT: store i32 [[X:%.*]], ptr [[TMP15]], align 4
-; CHECK-NEXT: store i32 [[X]], ptr [[TMP17]], align 4
-; CHECK-NEXT: store i32 [[X]], ptr [[TMP19]], align 4
-; CHECK-NEXT: store i32 [[X]], ptr [[TMP21]], align 4
-; CHECK-NEXT: store i32 [[Y:%.*]], ptr [[TMP23]], align 4
-; CHECK-NEXT: store i32 [[Y]], ptr [[TMP25]], align 4
-; CHECK-NEXT: store i32 [[Y]], ptr [[TMP27]], align 4
-; CHECK-NEXT: store i32 [[Y]], ptr [[TMP29]], align 4
-; CHECK-NEXT: store i32 [[Z:%.*]], ptr [[TMP9]], align 4
-; CHECK-NEXT: store i32 [[Z]], ptr [[TMP10]], align 4
-; CHECK-NEXT: store i32 [[Z]], ptr [[TMP11]], align 4
-; CHECK-NEXT: store i32 [[Z]], ptr [[TMP13]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 8)
-; CHECK-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 3, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I]]
+; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ 3, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[A_I:%.*]] = getelementptr inbounds nuw i32, ptr [[A:%.*]], i64 [[I]]
; CHECK-NEXT: [[TMP31:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]]
; CHECK-NEXT: [[A_I_MINUS_1:%.*]] = getelementptr i8, ptr [[TMP31]], i64 -4
; CHECK-NEXT: [[TMP32:%.*]] = getelementptr i32, ptr [[A]], i64 [[I]]
; CHECK-NEXT: [[A_I_MINUS_3:%.*]] = getelementptr i8, ptr [[TMP32]], i64 -12
-; CHECK-NEXT: store i32 [[X]], ptr [[A_I_MINUS_1]], align 4
-; CHECK-NEXT: store i32 [[Y]], ptr [[A_I_MINUS_3]], align 4
-; CHECK-NEXT: store i32 [[Z]], ptr [[A_I]], align 4
+; CHECK-NEXT: store i32 [[X:%.*]], ptr [[A_I_MINUS_1]], align 4
+; CHECK-NEXT: store i32 [[Y:%.*]], ptr [[A_I_MINUS_3]], align 4
+; CHECK-NEXT: store i32 [[Z:%.*]], ptr [[A_I]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 2
-; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP35:![0-9]+]]
+; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N:%.*]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -1489,7 +1427,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: [[DOTIDX:%.*]] = shl i64 [[INDEX]], 2
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[A]], i64 [[DOTIDX]]
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP17]], i64 2
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i16>, ptr [[TMP7]], align 4, !alias.scope [[META34:![0-9]+]]
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
; CHECK-NEXT: [[STRIDED_VEC4]] = shufflevector <8 x i16> [[WIDE_VEC]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[STRIDED_VEC4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
@@ -1499,10 +1437,10 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: [[TMP12:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP12]], [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
-; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP14]], align 4, !alias.scope [[META36:![0-9]+]], !noalias [[META39:![0-9]+]]
+; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr [[TMP14]], align 4, !alias.scope [[META37:![0-9]+]], !noalias [[META34]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i16> [[WIDE_VEC]], i64 7
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
@@ -1531,7 +1469,7 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I]]
; CHECK-NEXT: store i32 [[MUL012]], ptr [[ARRAYIDX5]], align 4
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], [[N]]
-; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[LOOP]], !llvm.loop [[LOOP42:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[END]], label [[LOOP]], !llvm.loop [[LOOP40:![0-9]+]]
; CHECK: end:
; CHECK-NEXT: ret void
;
>From ded138f0467c05ebf96314eee69965121ec032cc Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Sat, 23 Aug 2025 23:16:38 -0500
Subject: [PATCH 4/7] [SCEV] Fix NSW flag propagation in getAddExpr
SCEV was losing NSW flags during AddRec operations, causing Dependence
Analysis to add unnecessary runtime assumptions for inbounds GEPs.
This patch fixes getAddExpr: when combining AddRecs from the same loop, preserve
compatible NSW/NUW flags from input AddRecs instead of always using FlagAnyWrap.
---
llvm/lib/Analysis/ScalarEvolution.cpp | 31 +++++-
.../Delinearization/fixed_size_array.ll | 4 +-
.../scev-nsw-flags-enable-analysis.ll | 45 ++++++++
.../ScalarEvolution/different-loops-recs.ll | 30 +++---
.../gep-nsw-flag-preservation.ll | 100 ++++++++++++++++++
5 files changed, 191 insertions(+), 19 deletions(-)
create mode 100644 llvm/test/Analysis/DependenceAnalysis/scev-nsw-flags-enable-analysis.ll
create mode 100644 llvm/test/Analysis/ScalarEvolution/gep-nsw-flag-preservation.ll
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index d2c445f1ffaa0..9867994527c45 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -2951,25 +2951,52 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
// Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L>
SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
+
+ // Track flags: start with the flags from the first AddRec.
+ bool AllHaveNSW = AddRec->hasNoSignedWrap();
+ bool AllHaveNUW = AddRec->hasNoUnsignedWrap();
+
for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
++OtherIdx) {
const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
if (OtherAddRec->getLoop() == AddRecLoop) {
+ // Update flags based on this AddRec
+ if (!OtherAddRec->hasNoSignedWrap())
+ AllHaveNSW = false;
+ if (!OtherAddRec->hasNoUnsignedWrap())
+ AllHaveNUW = false;
for (unsigned i = 0, e = OtherAddRec->getNumOperands();
i != e; ++i) {
if (i >= AddRecOps.size()) {
append_range(AddRecOps, OtherAddRec->operands().drop_front(i));
break;
}
+ // Preserve no-wrap flags when combining AddRec operands.
+ SCEV::NoWrapFlags CombineFlags = SCEV::FlagAnyWrap;
+ if (auto *AR1 = dyn_cast<SCEVAddRecExpr>(AddRecOps[i]))
+ if (auto *AR2 =
+ dyn_cast<SCEVAddRecExpr>(OtherAddRec->getOperand(i))) {
+ if (AR1->hasNoSignedWrap() && AR2->hasNoSignedWrap())
+ CombineFlags = setFlags(CombineFlags, SCEV::FlagNSW);
+ if (AR1->hasNoUnsignedWrap() && AR2->hasNoUnsignedWrap())
+ CombineFlags = setFlags(CombineFlags, SCEV::FlagNUW);
+ }
SmallVector<const SCEV *, 2> TwoOps = {
AddRecOps[i], OtherAddRec->getOperand(i)};
- AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1);
+ AddRecOps[i] = getAddExpr(TwoOps, CombineFlags, Depth + 1);
}
Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
}
}
// Step size has changed, so we cannot guarantee no self-wraparound.
- Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
+ // However, preserve NSW/NUW flags if all combined AddRecs had them.
+ SCEV::NoWrapFlags FinalFlags = SCEV::FlagAnyWrap;
+ if (AllHaveNSW)
+ FinalFlags = setFlags(FinalFlags, SCEV::FlagNSW);
+ if (AllHaveNUW)
+ FinalFlags = setFlags(FinalFlags, SCEV::FlagNUW);
+
+ Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, FinalFlags);
return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
}
}
diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
index 634850bb4a5a2..ffd0202e205ce 100644
--- a/llvm/test/Analysis/Delinearization/fixed_size_array.ll
+++ b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
@@ -339,7 +339,7 @@ define void @a_i_j2k_i(ptr %a) {
; CHECK-LABEL: 'a_i_j2k_i'
; CHECK-NEXT: Inst: store i32 1, ptr %idx, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1028}<%for.i.header>,+,256}<nw><%for.j.header>,+,128}<nw><%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1028}<nuw><nsw><%for.i.header>,+,256}<nw><%for.j.header>,+,128}<nw><%for.k>
; CHECK-NEXT: failed to delinearize
;
entry:
@@ -391,7 +391,7 @@ define void @a_i_i_jk(ptr %a) {
; CHECK-LABEL: 'a_i_i_jk'
; CHECK-NEXT: Inst: store i32 1, ptr %idx, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1152}<%for.i.header>,+,4}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1152}<nuw><nsw><%for.i.header>,+,4}<nw><%for.j.header>,+,4}<nw><%for.k>
; CHECK-NEXT: Base offset: %a
; CHECK-NEXT: ArrayDecl[UnknownSize][288] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{{\{\{}}0,+,1}<nuw><nsw><%for.j.header>,+,1}<nuw><nsw><%for.k>]
diff --git a/llvm/test/Analysis/DependenceAnalysis/scev-nsw-flags-enable-analysis.ll b/llvm/test/Analysis/DependenceAnalysis/scev-nsw-flags-enable-analysis.ll
new file mode 100644
index 0000000000000..b717ce9815341
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/scev-nsw-flags-enable-analysis.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 | FileCheck %s
+
+; Test that SCEV NSW flag preservation enables dependence analysis to work
+; correctly. Previously, SCEV would lose NSW flags when combining AddRec
+; expressions from GEP operations, causing dependence analysis to incorrectly
+; classify expressions as "wrapping" and fail analysis.
+
+define void @test_da_with_scev_flags(ptr %A) {
+; This test verifies that dependence analysis now correctly identifies
+; self-dependences when SCEV preserves NSW flags from GEP index computations.
+; CHECK-LABEL: 'test_da_with_scev_flags'
+; CHECK-NEXT: Src: %val = load i32, ptr %gep, align 4 --> Dst: %val = load i32, ptr %gep, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: %val = load i32, ptr %gep, align 4 --> Dst: store i32 %val, ptr %gep, align 4
+; CHECK-NEXT: da analyze - consistent anti [0|<]!
+; CHECK-NEXT: Src: store i32 %val, ptr %gep, align 4 --> Dst: store i32 %val, ptr %gep, align 4
+; CHECK-NEXT: da analyze - none!
+;
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+
+ ; Create NSW-flagged index computation
+ %mul = mul nsw i64 %i, 3
+ %sub = add nsw i64 %mul, -6
+
+ ; GEP that should result in SCEV: {(-2424 + %A),+,1212}<nw>
+ ; The <nw> flag should prevent false "wrapping" detection in DA
+ %gep = getelementptr inbounds [100 x i32], ptr %A, i64 %sub, i64 %sub
+
+ ; Self-dependence: should be detected as "none" (no dependence)
+ %val = load i32, ptr %gep
+ store i32 %val, ptr %gep
+
+ %i.next = add nsw i64 %i, 1
+ %cond = icmp ult i64 %i.next, 50
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
index 44bff5638bc85..b700f6cd0b432 100644
--- a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
@@ -23,9 +23,9 @@ define void @test_00(i1 %arg) {
; CHECK-NEXT: %phi3.inc = add i32 %phi3, 3
; CHECK-NEXT: --> {9,+,3}<nuw><nsw><%loop1> U: [9,502) S: [9,502) Exits: 501 LoopDispositions: { %loop1: Computable }
; CHECK-NEXT: %sum1 = add i32 %phi1, %phi2
-; CHECK-NEXT: --> {14,+,3}<%loop1> U: [14,507) S: [14,507) Exits: 506 LoopDispositions: { %loop1: Computable }
+; CHECK-NEXT: --> {14,+,3}<nuw><nsw><%loop1> U: [14,507) S: [14,507) Exits: 506 LoopDispositions: { %loop1: Computable }
; CHECK-NEXT: %sum2 = add i32 %sum1, %phi3
-; CHECK-NEXT: --> {20,+,6}<%loop1> U: [20,1005) S: [20,1005) Exits: 1004 LoopDispositions: { %loop1: Computable }
+; CHECK-NEXT: --> {20,+,6}<nuw><nsw><%loop1> U: [20,1005) S: [20,1005) Exits: 1004 LoopDispositions: { %loop1: Computable }
; CHECK-NEXT: %phi4 = phi i32 [ 63, %loop1 ], [ %phi4.inc, %loop2 ]
; CHECK-NEXT: --> {63,+,1}<nuw><nsw><%loop2> U: [63,205) S: [63,205) Exits: 204 LoopDispositions: { %loop2: Computable }
; CHECK-NEXT: %phi5 = phi i32 [ 53, %loop1 ], [ %phi5.inc, %loop2 ]
@@ -39,21 +39,21 @@ define void @test_00(i1 %arg) {
; CHECK-NEXT: %phi6.inc = add i32 %phi6, 3
; CHECK-NEXT: --> {46,+,3}<nuw><nsw><%loop2> U: [46,470) S: [46,470) Exits: 469 LoopDispositions: { %loop2: Computable }
; CHECK-NEXT: %sum3 = add i32 %phi4, %phi5
-; CHECK-NEXT: --> {116,+,3}<%loop2> U: [116,540) S: [116,540) Exits: 539 LoopDispositions: { %loop2: Computable }
+; CHECK-NEXT: --> {116,+,3}<nuw><nsw><%loop2> U: [116,540) S: [116,540) Exits: 539 LoopDispositions: { %loop2: Computable }
; CHECK-NEXT: %sum4 = add i32 %sum3, %phi6
-; CHECK-NEXT: --> {159,+,6}<%loop2> U: [159,1006) S: [159,1006) Exits: 1005 LoopDispositions: { %loop2: Computable }
+; CHECK-NEXT: --> {159,+,6}<nuw><nsw><%loop2> U: [159,1006) S: [159,1006) Exits: 1005 LoopDispositions: { %loop2: Computable }
; CHECK-NEXT: %s1 = add i32 %phi1, %phi4
; CHECK-NEXT: --> {{\{\{}}73,+,1}<nuw><nsw><%loop1>,+,1}<nw><%loop2> U: [73,379) S: [73,379) --> 378 U: [378,379) S: [378,379)
; CHECK-NEXT: %s2 = add i32 %phi5, %phi2
; CHECK-NEXT: --> {{\{\{}}57,+,2}<nuw><nsw><%loop1>,+,2}<nw><%loop2> U: [57,668) S: [57,668) --> 667 U: [667,668) S: [667,668)
; CHECK-NEXT: %s3 = add i32 %sum1, %sum3
-; CHECK-NEXT: --> {{\{\{}}130,+,3}<%loop1>,+,3}<%loop2> U: [130,1046) S: [130,1046) --> 1045 U: [1045,1046) S: [1045,1046)
+; CHECK-NEXT: --> {{\{\{}}130,+,3}<nuw><nsw><%loop1>,+,3}<nw><%loop2> U: [130,1046) S: [130,1046) --> 1045 U: [1045,1046) S: [1045,1046)
; CHECK-NEXT: %s4 = add i32 %sum4, %sum2
-; CHECK-NEXT: --> {{\{\{}}179,+,6}<%loop1>,+,6}<%loop2> U: [179,2010) S: [179,2010) --> 2009 U: [2009,2010) S: [2009,2010)
+; CHECK-NEXT: --> {{\{\{}}179,+,6}<nuw><nsw><%loop1>,+,6}<nw><%loop2> U: [179,2010) S: [179,2010) --> 2009 U: [2009,2010) S: [2009,2010)
; CHECK-NEXT: %s5 = add i32 %phi3, %sum3
-; CHECK-NEXT: --> {{\{\{}}122,+,3}<nuw><nsw><%loop1>,+,3}<%loop2> U: [122,1038) S: [122,1038) --> 1037 U: [1037,1038) S: [1037,1038)
+; CHECK-NEXT: --> {{\{\{}}122,+,3}<nuw><nsw><%loop1>,+,3}<nw><%loop2> U: [122,1038) S: [122,1038) --> 1037 U: [1037,1038) S: [1037,1038)
; CHECK-NEXT: %s6 = add i32 %sum2, %phi6
-; CHECK-NEXT: --> {{\{\{}}63,+,6}<%loop1>,+,3}<nw><%loop2> U: [63,1471) S: [63,1471) --> 1470 U: [1470,1471) S: [1470,1471)
+; CHECK-NEXT: --> {{\{\{}}63,+,6}<nuw><nsw><%loop1>,+,3}<nw><%loop2> U: [63,1471) S: [63,1471) --> 1470 U: [1470,1471) S: [1470,1471)
; CHECK-NEXT: Determining loop execution counts for: @test_00
; CHECK-NEXT: Loop %loop2: backedge-taken count is i32 141
; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i32 141
@@ -139,13 +139,13 @@ define void @test_01(i32 %a, i32 %b) {
; CHECK-NEXT: %phi6.inc = add i32 %phi6, 3
; CHECK-NEXT: --> {46,+,3}<nuw><nsw><%loop2> U: [46,548) S: [46,548) Exits: (46 + (3 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))<nuw>)<nuw> LoopDispositions: { %loop2: Computable }
; CHECK-NEXT: %sum3 = add i32 %phi4, %phi5
-; CHECK-NEXT: --> {116,+,3}<%loop2> U: [116,618) S: [116,618) Exits: (116 + (3 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))<nuw>)<nuw> LoopDispositions: { %loop2: Computable }
+; CHECK-NEXT: --> {116,+,3}<nuw><nsw><%loop2> U: [116,618) S: [116,618) Exits: (116 + (3 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))<nuw>)<nuw> LoopDispositions: { %loop2: Computable }
; CHECK-NEXT: %sum4 = add i32 %sum3, %phi6
-; CHECK-NEXT: --> {159,+,6}<%loop2> U: [159,1162) S: [159,1162) Exits: (159 + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable }
+; CHECK-NEXT: --> {159,+,6}<nuw><nsw><%loop2> U: [159,1162) S: [159,1162) Exits: (159 + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable }
; CHECK-NEXT: %is2 = add i32 %sum4, %b
-; CHECK-NEXT: --> {(159 + %b),+,6}<%loop2> U: full-set S: full-set Exits: (159 + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))) + %b) LoopDispositions: { %loop2: Computable }
+; CHECK-NEXT: --> {(159 + %b),+,6}<nw><%loop2> U: full-set S: full-set Exits: (159 + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))) + %b) LoopDispositions: { %loop2: Computable }
; CHECK-NEXT: %ec2 = add i32 %is1, %is2
-; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> {(165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))),+,6}<%loop2> U: full-set S: full-set Exits: (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable }
+; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<nw><%loop2> U: full-set S: full-set --> {(165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))),+,6}<nw><%loop2> U: full-set S: full-set Exits: (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable }
; CHECK-NEXT: %s1 = add i32 %phi1, %is1
; CHECK-NEXT: --> {(6 + (3 * %a) + %b),+,7}<%loop1> U: full-set S: full-set --> (6 + (3 * %a) + (7 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %b) U: full-set S: full-set
; CHECK-NEXT: %s2 = add i32 %is2, %phi4
@@ -153,11 +153,11 @@ define void @test_01(i32 %a, i32 %b) {
; CHECK-NEXT: %s3 = add i32 %is1, %phi5
; CHECK-NEXT: --> {{\{\{}}(59 + (2 * %a) + %b),+,6}<%loop1>,+,2}<nw><%loop2> U: full-set S: full-set --> (59 + (2 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))<nuw><nsw> + (2 * %a) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %b) U: full-set S: full-set
; CHECK-NEXT: %s4 = add i32 %phi2, %is2
-; CHECK-NEXT: --> {{\{\{}}(159 + (2 * %b)),+,2}<nw><%loop1>,+,6}<%loop2> U: full-set S: full-set --> (159 + (2 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))<nuw><nsw> + (2 * %b) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set
+; CHECK-NEXT: --> {{\{\{}}(159 + (2 * %b)),+,2}<nw><%loop1>,+,6}<nw><%loop2> U: full-set S: full-set --> (159 + (2 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))<nuw><nsw> + (2 * %b) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set
; CHECK-NEXT: %s5 = add i32 %is1, %is2
-; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set
+; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<nw><%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set
; CHECK-NEXT: %s6 = add i32 %is2, %is1
-; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set
+; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<nw><%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set
; CHECK-NEXT: Determining loop execution counts for: @test_01
; CHECK-NEXT: Loop %loop2: backedge-taken count is (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))<nuw><nsw> + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))<nuw><nsw> + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))
; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i32 167
diff --git a/llvm/test/Analysis/ScalarEvolution/gep-nsw-flag-preservation.ll b/llvm/test/Analysis/ScalarEvolution/gep-nsw-flag-preservation.ll
new file mode 100644
index 0000000000000..3d1865540c78f
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/gep-nsw-flag-preservation.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
+
+; Test that SCEV preserves NSW flags when combining AddRec expressions
+; from GEP operations. Previously, the NSW flags would be lost when
+; combining offset calculations like:
+; {-2400,+,1200}<nsw> + {-24,+,12}<nsw> = {-2424,+,1212}<nsw>
+; This test ensures the fix in getAddExpr properly preserves flags.
+
+define void @test_gep_nsw_preservation(ptr %A) {
+; CHECK-LABEL: 'test_gep_nsw_preservation'
+; CHECK-NEXT: Classifying expressions for: @test_gep_nsw_preservation
+; CHECK-NEXT: %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,50) S: [0,50) Exits: 49 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: %mul = mul nsw i64 %i, 3
+; CHECK-NEXT: --> {0,+,3}<nuw><nsw><%loop> U: [0,148) S: [0,148) Exits: 147 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: %sub = add nsw i64 %mul, -6
+; CHECK-NEXT: --> {-6,+,3}<nsw><%loop> U: [-6,142) S: [-6,142) Exits: 141 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: %gep = getelementptr inbounds [100 x i32], ptr %A, i64 %sub, i64 %sub
+; CHECK-NEXT: --> {(-2424 + %A),+,1212}<nw><%loop> U: full-set S: full-set Exits: (56964 + %A) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: %i.next = add nsw i64 %i, 1
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,51) S: [1,51) Exits: 50 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: Determining loop execution counts for: @test_gep_nsw_preservation
+; CHECK-NEXT: Loop %loop: backedge-taken count is i64 49
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 49
+; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i64 49
+; CHECK-NEXT: Loop %loop: Trip multiple is 50
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+
+ ; Create NSW-flagged arithmetic that gets used in GEP indices
+ %mul = mul nsw i64 %i, 3
+ %sub = add nsw i64 %mul, -6
+
+ ; GEP with inbounds using the NSW-flagged expressions as indices
+ ; This should result in SCEV: {(-2424 + %A),+,1212}<nw>
+ ; The <nw> flag should be preserved from the NSW flags on the indices
+ %gep = getelementptr inbounds [100 x i32], ptr %A, i64 %sub, i64 %sub
+
+ %i.next = add nsw i64 %i, 1
+ %cond = icmp ult i64 %i.next, 50
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+; Test that the preserved flags enable dependence analysis
+define void @test_dependence_analysis(ptr %A) {
+; This test should show that dependence analysis works correctly
+; when SCEV preserves NSW flags, preventing false "wrapping" detection
+; CHECK-LABEL: 'test_dependence_analysis'
+; CHECK-NEXT: Classifying expressions for: @test_dependence_analysis
+; CHECK-NEXT: %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,50) S: [0,50) Exits: 49 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: %mul = mul nsw i64 %i, 3
+; CHECK-NEXT: --> {0,+,3}<nuw><nsw><%loop> U: [0,148) S: [0,148) Exits: 147 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: %sub = add nsw i64 %mul, -6
+; CHECK-NEXT: --> {-6,+,3}<nsw><%loop> U: [-6,142) S: [-6,142) Exits: 141 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: %gep1 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub, i64 %sub
+; CHECK-NEXT: --> {(-2424 + %A),+,1212}<nw><%loop> U: full-set S: full-set Exits: (56964 + %A) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: %gep2 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub, i64 %sub
+; CHECK-NEXT: --> {(-2424 + %A),+,1212}<nw><%loop> U: full-set S: full-set Exits: (56964 + %A) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: %val = load i32, ptr %gep2, align 4
+; CHECK-NEXT: --> %val U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
+; CHECK-NEXT: %i.next = add nsw i64 %i, 1
+; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,51) S: [1,51) Exits: 50 LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: Determining loop execution counts for: @test_dependence_analysis
+; CHECK-NEXT: Loop %loop: backedge-taken count is i64 49
+; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 49
+; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is i64 49
+; CHECK-NEXT: Loop %loop: Trip multiple is 50
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+
+ %mul = mul nsw i64 %i, 3
+ %sub = add nsw i64 %mul, -6
+
+ ; Two identical GEPs - should be detected as same location (no dependence)
+ %gep1 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub, i64 %sub
+ %gep2 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub, i64 %sub
+
+ %val = load i32, ptr %gep2
+ store i32 %val, ptr %gep1
+
+ %i.next = add nsw i64 %i, 1
+ %cond = icmp ult i64 %i.next, 50
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ ret void
+}
>From e1737c70db9785efb764600102c808020cc8389a Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Sat, 23 Aug 2025 23:19:04 -0500
Subject: [PATCH 5/7] [SCEV] Fix NSW flag propagation in getMulExpr
SCEV was losing NSW flags during AddRec operations, causing Dependence
Analysis to add unnecessary runtime assumptions for inbounds GEPs.
This patch fixes getMulExpr: when multiplying AddRecs by constants, preserve NSW
flags that were explicitly requested from inbounds GEP operations. The overflow
check was too conservative and cleared flags even when they were explicitly
requested via OrigFlags.
---
llvm/lib/Analysis/ScalarEvolution.cpp | 15 ++++++++--
llvm/test/Analysis/Delinearization/a.ll | 4 +--
.../constant_functions_multi_dim.ll | 4 +--
.../test/Analysis/Delinearization/himeno_1.ll | 2 +-
.../test/Analysis/Delinearization/himeno_2.ll | 2 +-
.../iv_times_constant_in_subscript.ll | 4 +--
.../multidim_ivs_and_integer_offsets_3d.ll | 2 +-
...multidim_ivs_and_integer_offsets_nts_3d.ll | 4 +--
...multidim_ivs_and_parameteric_offsets_3d.ll | 2 +-
.../Delinearization/multidim_only_ivs_2d.ll | 4 +--
.../multidim_only_ivs_2d_nested.ll | 2 +-
.../Delinearization/multidim_only_ivs_3d.ll | 2 +-
.../multidim_only_ivs_3d_cast.ll | 2 +-
..._two_accesses_different_delinearization.ll | 4 +--
.../Analysis/DependenceAnalysis/DADelin.ll | 6 ++--
.../different-access-types-rt-checks.ll | 2 +-
...bolic-max-backedge-taken-count-may-wrap.ll | 6 ++--
.../loops-with-indirect-reads-and-writes.ll | 4 +--
.../LoopAccessAnalysis/number-of-memchecks.ll | 2 +-
...ter-dependence-analysis-forked-pointers.ll | 8 ++---
...untime-checks-after-dependence-analysis.ll | 12 ++++----
.../LoopAccessAnalysis/symbolic-stride.ll | 30 +++++++++----------
.../ScalarEvolution/flags-from-poison.ll | 26 ++++++++--------
.../max-backedge-taken-count-guard-info.ll | 20 ++++++-------
.../ScalarEvolution/max-expr-cache.ll | 12 ++++----
.../trip-count-scalable-stride.ll | 8 ++---
.../lsr-term-fold-negative-testcase.ll | 2 +-
.../LoopUnrollAndJam/unroll-and-jam.ll | 10 +++----
28 files changed, 106 insertions(+), 95 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 9867994527c45..3cd5b34d16838 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3279,7 +3279,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(AddRec->getNumOperands());
- const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1);
+ const SCEV *Scale = getMulExpr(LIOps, OrigFlags, Depth + 1);
// If both the mul and addrec are nuw, we can preserve nuw.
// If both the mul and addrec are nsw, we can only preserve nsw if either
@@ -3288,6 +3288,15 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
SCEV::NoWrapFlags Flags =
AddRec->getNoWrapFlags(ComputeFlags({Scale, AddRec}));
+ // Preserve flags for positive constant Scale.
+ if (auto *SC = dyn_cast<SCEVConstant>(Scale))
+ if (SC->getAPInt().isStrictlyPositive()) {
+ if (hasFlags(OrigFlags, SCEV::FlagNSW))
+ Flags = setFlags(Flags, SCEV::FlagNSW);
+ if (hasFlags(OrigFlags, SCEV::FlagNUW))
+ Flags = setFlags(Flags, SCEV::FlagNUW);
+ }
+
for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i),
SCEV::FlagAnyWrap, Depth + 1));
@@ -3297,7 +3306,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
Instruction::Mul, getSignedRange(Scale),
OverflowingBinaryOperator::NoSignedWrap);
if (!NSWRegion.contains(getSignedRange(AddRec->getOperand(i))))
- Flags = clearFlags(Flags, SCEV::FlagNSW);
+ if (!hasFlags(OrigFlags, SCEV::FlagNSW))
+ Flags = clearFlags(Flags, SCEV::FlagNSW);
+ // Keep NSW flag if it was in OrigFlags.
}
}
diff --git a/llvm/test/Analysis/Delinearization/a.ll b/llvm/test/Analysis/Delinearization/a.ll
index 755c9baef9b8f..b2eeede192b1b 100644
--- a/llvm/test/Analysis/Delinearization/a.ll
+++ b/llvm/test/Analysis/Delinearization/a.ll
@@ -12,10 +12,10 @@ define void @foo(i64 %n, i64 %m, i64 %o, ptr nocapture %A) #0 {
; CHECK-LABEL: 'foo'
; CHECK-NEXT: Inst: store i32 1, ptr %arrayidx11.us.us, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}(28 + (4 * (-4 + (3 * %m)) * %o)),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}(28 + (4 * (-4 + (3 * %m)) * %o)),+,(8 * %m * %o)}<%for.i>,+,(12 * %o)}<%for.j>,+,20}<nsw><%for.k>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m][%o] with elements of 4 bytes.
-; CHECK-NEXT: ArrayRef[{3,+,2}<nuw><%for.i>][{-4,+,3}<nw><%for.j>][{7,+,5}<nw><%for.k>]
+; CHECK-NEXT: ArrayRef[{3,+,2}<nuw><%for.i>][{-4,+,3}<nw><%for.j>][{7,+,5}<nuw><nsw><%for.k>]
;
entry:
%cmp32 = icmp sgt i64 %n, 0
diff --git a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll
index c0b1a0b9cddaf..1c6be82ce5068 100644
--- a/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll
+++ b/llvm/test/Analysis/Delinearization/constant_functions_multi_dim.ll
@@ -8,14 +8,14 @@ define void @mat_mul(ptr %C, ptr %A, ptr %B, i64 %N) #0 !kernel_arg_addr_space !
; CHECK-LABEL: 'mat_mul'
; CHECK-NEXT: Inst: %tmp = load float, ptr %arrayidx, align 4
; CHECK-NEXT: In Loop with Header: for.inc
-; CHECK-NEXT: AccessFunction: {(4 * %N * %call),+,4}<%for.inc>
+; CHECK-NEXT: AccessFunction: {(4 * %N * %call),+,4}<nsw><%for.inc>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[%call][{0,+,1}<nuw><nsw><%for.inc>]
; CHECK-EMPTY:
; CHECK-NEXT: Inst: %tmp5 = load float, ptr %arrayidx4, align 4
; CHECK-NEXT: In Loop with Header: for.inc
-; CHECK-NEXT: AccessFunction: {(4 * %call1),+,(4 * %N)}<%for.inc>
+; CHECK-NEXT: AccessFunction: {(4 * %call1),+,(4 * %N)}<nsw><%for.inc>
; CHECK-NEXT: Base offset: %B
; CHECK-NEXT: ArrayDecl[UnknownSize][%N] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.inc>][%call1]
diff --git a/llvm/test/Analysis/Delinearization/himeno_1.ll b/llvm/test/Analysis/Delinearization/himeno_1.ll
index 292dca61d0592..1513eff8a6d76 100644
--- a/llvm/test/Analysis/Delinearization/himeno_1.ll
+++ b/llvm/test/Analysis/Delinearization/himeno_1.ll
@@ -33,7 +33,7 @@ define void @jacobi(i32 %nn, ptr nocapture %a, ptr nocapture %p) nounwind uwtabl
; CHECK-LABEL: 'jacobi'
; CHECK-NEXT: Inst: store float 1.000000e+00, ptr %arrayidx, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))<nsw>)),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))<nsw>}<%for.j>,+,4}<%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))<nsw>)),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))<nsw>}<%for.j>,+,4}<nsw><%for.k>
; CHECK-NEXT: Base offset: %a.base
; CHECK-NEXT: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
diff --git a/llvm/test/Analysis/Delinearization/himeno_2.ll b/llvm/test/Analysis/Delinearization/himeno_2.ll
index d210539d67d8b..158db4d1335e1 100644
--- a/llvm/test/Analysis/Delinearization/himeno_2.ll
+++ b/llvm/test/Analysis/Delinearization/himeno_2.ll
@@ -33,7 +33,7 @@ define void @jacobi(i32 %nn, ptr nocapture %a, ptr nocapture %p) nounwind uwtabl
; CHECK-LABEL: 'jacobi'
; CHECK-NEXT: Inst: store float 1.000000e+00, ptr %arrayidx, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))<nsw>)),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))<nsw>}<%for.j>,+,4}<%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}(4 + (4 * (sext i32 %a.deps to i64) * (1 + (sext i32 %a.cols to i64))<nsw>)),+,(4 * (sext i32 %a.deps to i64) * (sext i32 %a.cols to i64))}<%for.i>,+,(4 * (sext i32 %a.deps to i64))<nsw>}<%for.j>,+,4}<nsw><%for.k>
; CHECK-NEXT: Base offset: %a.base
; CHECK-NEXT: ArrayDecl[UnknownSize][(sext i32 %a.cols to i64)][(sext i32 %a.deps to i64)] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[{1,+,1}<nuw><nsw><%for.i>][{1,+,1}<nuw><nsw><%for.j>][{1,+,1}<nuw><nsw><%for.k>]
diff --git a/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll b/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
index cbe3ec8a19acd..dbb3b0eadb3fb 100644
--- a/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
+++ b/llvm/test/Analysis/Delinearization/iv_times_constant_in_subscript.ll
@@ -13,10 +13,10 @@ define void @foo(i64 %n, i64 %m, i64 %b, ptr %A) {
; CHECK-LABEL: 'foo'
; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %arrayidx, align 8
; CHECK-NEXT: In Loop with Header: for.j
-; CHECK-NEXT: AccessFunction: {{\{\{}}(8 * %m * %b),+,(16 * %m)}<%for.i>,+,16}<%for.j>
+; CHECK-NEXT: AccessFunction: {{\{\{}}(8 * %m * %b),+,(16 * %m)}<%for.i>,+,16}<nsw><%for.j>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
-; CHECK-NEXT: ArrayRef[{%b,+,2}<nsw><%for.i>][{0,+,2}<nuw><%for.j>]
+; CHECK-NEXT: ArrayRef[{%b,+,2}<nsw><%for.i>][{0,+,2}<nuw><nsw><%for.j>]
;
entry:
br label %for.i
diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
index 3d21d97438462..6e2ba3e8c8a20 100644
--- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_3d.ll
@@ -13,7 +13,7 @@ define void @foo(i64 %n, i64 %m, i64 %o, ptr %A) {
; CHECK-LABEL: 'foo'
; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %idx, align 8
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}(56 + (8 * (-4 + (3 * %m)) * %o)),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}(56 + (8 * (-4 + (3 * %m)) * %o)),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<nsw><%for.k>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{3,+,1}<nuw><%for.i>][{-4,+,1}<nsw><%for.j>][{7,+,1}<nuw><nsw><%for.k>]
diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
index 3dbd71b1c9ac5..2dce3a9d55e2f 100644
--- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_integer_offsets_nts_3d.ll
@@ -13,10 +13,10 @@ define void @foo(i64 %n, i64 %m, i64 %o, i64 %p, ptr nocapture %A) nounwind uwta
; CHECK-LABEL: 'foo'
; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %arrayidx10.us.us, align 8
; CHECK-NEXT: In Loop with Header: for.body6.us.us
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}(56 + (8 * (-4 + (3 * %m)) * (%o + %p))),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<%for.body6.us.us>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}(56 + (8 * (-4 + (3 * %m)) * (%o + %p))),+,(8 * (%o + %p) * %m)}<%for.cond4.preheader.lr.ph.us>,+,(8 * (%o + %p))}<%for.body6.lr.ph.us.us>,+,8}<nsw><%for.body6.us.us>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m][(%o + %p)] with elements of 8 bytes.
-; CHECK-NEXT: ArrayRef[{3,+,1}<nuw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nw><%for.body6.us.us>]
+; CHECK-NEXT: ArrayRef[{3,+,1}<nuw><%for.cond4.preheader.lr.ph.us>][{-4,+,1}<nw><%for.body6.lr.ph.us.us>][{7,+,1}<nuw><nsw><%for.body6.us.us>]
;
entry:
%add = add nsw i64 %p, %o
diff --git a/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll b/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
index a2d64a5b40bc9..1c397f5ea622a 100644
--- a/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_ivs_and_parameteric_offsets_3d.ll
@@ -13,7 +13,7 @@ define void @foo(i64 %n, i64 %m, i64 %o, ptr %A, i64 %p, i64 %q, i64 %r) {
; CHECK-LABEL: 'foo'
; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %idx, align 8
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}(8 * ((((%m * %p) + %q) * %o) + %r)),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}(8 * ((((%m * %p) + %q) * %o) + %r)),+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<nsw><%for.k>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{%p,+,1}<nw><%for.i>][{%q,+,1}<nsw><%for.j>][{%r,+,1}<nsw><%for.k>]
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
index ac83ba19b252d..037cbd2c934f5 100644
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d.ll
@@ -13,14 +13,14 @@ define void @foo(i64 %n, i64 %m, ptr %A) {
; CHECK-LABEL: 'foo'
; CHECK-NEXT: Inst: %val = load double, ptr %arrayidx, align 8
; CHECK-NEXT: In Loop with Header: for.j
-; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<%for.j>
+; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<nsw><%for.j>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
; CHECK-EMPTY:
; CHECK-NEXT: Inst: store double %val, ptr %arrayidx, align 8
; CHECK-NEXT: In Loop with Header: for.j
-; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<%for.j>
+; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<nsw><%for.j>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
index 262a092794cb1..f35101e58a04e 100644
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_2d_nested.ll
@@ -19,7 +19,7 @@ define void @foo(i64 %a, i64 %b) nounwind uwtable {
; CHECK-LABEL: 'foo'
; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %arrayidx10.us.us, align 8
; CHECK-NEXT: In Loop with Header: for.body9.us.us
-; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,{8,+,8}<%for.cond7.preheader.lr.ph.split.us.us>}<%for.body9.lr.ph.us.us>,+,8}<%for.body9.us.us>
+; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,{8,+,8}<%for.cond7.preheader.lr.ph.split.us.us>}<%for.body9.lr.ph.us.us>,+,8}<nsw><%for.body9.us.us>
; CHECK-NEXT: failed to delinearize
;
entry:
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
index b1405db81a787..6f7689b405348 100644
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d.ll
@@ -13,7 +13,7 @@ define void @foo(i64 %n, i64 %m, i64 %o, ptr %A) {
; CHECK-LABEL: 'foo'
; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %idx, align 8
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<nsw><%for.k>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m][%o] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
diff --git a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
index 6de072ebaee13..a322c3fcfa383 100644
--- a/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_only_ivs_3d_cast.ll
@@ -15,7 +15,7 @@ define void @foo(i32 %n, i32 %m, i32 %o, ptr %A) {
; CHECK-LABEL: 'foo'
; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %idx, align 8
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))<nuw><nsw>}<%for.j>,+,8}<%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,(8 * (zext i32 %m to i64) * (zext i32 %o to i64))}<%for.i>,+,(8 * (zext i32 %o to i64))<nuw><nsw>}<%for.j>,+,8}<nsw><%for.k>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][(zext i32 %m to i64)][(zext i32 %o to i64)] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.k>]
diff --git a/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll b/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
index d7148c5216462..65f9edf004cc5 100644
--- a/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
+++ b/llvm/test/Analysis/Delinearization/multidim_two_accesses_different_delinearization.ll
@@ -16,14 +16,14 @@ define void @foo(i64 %n, i64 %m, ptr %A) {
; CHECK-LABEL: 'foo'
; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %arrayidx, align 8
; CHECK-NEXT: In Loop with Header: for.j
-; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<%for.j>
+; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,(8 * %m)}<%for.i>,+,8}<nsw><%for.j>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i>][{0,+,1}<nuw><nsw><%for.j>]
; CHECK-EMPTY:
; CHECK-NEXT: Inst: store double 1.000000e+00, ptr %arrayidx1, align 8
; CHECK-NEXT: In Loop with Header: for.j
-; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,8}<%for.i>,+,(8 * %n)}<%for.j>
+; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,8}<%for.i>,+,(8 * %n)}<nsw><%for.j>
; CHECK-NEXT: Base offset: %A
; CHECK-NEXT: ArrayDecl[UnknownSize][%n] with elements of 8 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.j>][{0,+,1}<nuw><nsw><%for.i>]
diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
index 8f94a455d3724..af62e369b71a1 100644
--- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
@@ -598,11 +598,11 @@ for.end12: ; preds = %for.inc10, %entry
define void @nonnegative(ptr nocapture %A, i32 %N) {
; CHECK-LABEL: 'nonnegative'
; CHECK-NEXT: Src: store i32 1, ptr %arrayidx, align 4 --> Dst: store i32 1, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - output [* *]!
+; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i32 1, ptr %arrayidx, align 4 --> Dst: store i32 2, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - output [* *|<]!
+; CHECK-NEXT: da analyze - consistent output [0 0|<]!
; CHECK-NEXT: Src: store i32 2, ptr %arrayidx, align 4 --> Dst: store i32 2, ptr %arrayidx, align 4
-; CHECK-NEXT: da analyze - output [* *]!
+; CHECK-NEXT: da analyze - none!
;
entry:
%cmp44 = icmp eq i32 %N, 0
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/different-access-types-rt-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/different-access-types-rt-checks.ll
index 809472cb543ac..40eedd49a61cc 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/different-access-types-rt-checks.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/different-access-types-rt-checks.ll
@@ -136,7 +136,7 @@ define void @loads_of_same_pointer_with_different_sizes_retry_with_runtime_check
; CHECK-NEXT: Member: {%B,+,4}<nuw><%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: ((4 * %off) + %B) High: ((4 * %N) + (4 * %off) + %B))
-; CHECK-NEXT: Member: {((4 * %off) + %B),+,4}<%loop>
+; CHECK-NEXT: Member: {((4 * %off) + %B),+,4}<nw><%loop>
; CHECK-NEXT: Group GRP2:
; CHECK-NEXT: (Low: %A High: (%N + %A))
; CHECK-NEXT: Member: {%A,+,1}<nuw><%loop>
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
index e319c89577e24..85a3da234cc35 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/evaluate-at-symbolic-max-backedge-taken-count-may-wrap.ll
@@ -17,7 +17,7 @@ define void @runtime_checks_with_symbolic_max_btc_neg_1(ptr %P, ptr %S, i32 %x,
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: ((4 * %y) + %P) High: inttoptr (i32 -1 to ptr))
-; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop>
+; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<nw><%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %S High: (4 + %S))
; CHECK-NEXT: Member: %S
@@ -57,7 +57,7 @@ define void @runtime_check_with_symbolic_max_btc_neg_2(ptr %P, ptr %S, i32 %x, i
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: ((4 * %y) + %P) High: inttoptr (i32 -1 to ptr))
-; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<%loop>
+; CHECK-NEXT: Member: {((4 * %y) + %P),+,4}<nw><%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %S High: (4 + %S))
; CHECK-NEXT: Member: %S
@@ -151,7 +151,7 @@ define void @runtime_check_with_symbolic_max_wraps_to_positive_offset(ptr %P, pt
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: ((2 * %y) + %P) High: inttoptr (i32 -1 to ptr))
-; CHECK-NEXT: Member: {((2 * %y) + %P),+,2}<%loop>
+; CHECK-NEXT: Member: {((2 * %y) + %P),+,2}<nw><%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %S High: (4 + %S))
; CHECK-NEXT: Member: %S
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/loops-with-indirect-reads-and-writes.ll b/llvm/test/Analysis/LoopAccessAnalysis/loops-with-indirect-reads-and-writes.ll
index 3518d92c3511f..ce64fcced2f3c 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/loops-with-indirect-reads-and-writes.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/loops-with-indirect-reads-and-writes.ll
@@ -99,7 +99,7 @@ define void @test_indirect_read_loop_also_modifies_pointer_array(ptr noundef %ar
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: {(64 + %arr),+,64}<%loop.1> High: {(8064 + %arr),+,64}<%loop.1>)
-; CHECK-NEXT: Member: {{\{\{}}(64 + %arr),+,64}<%loop.1>,+,8}<%loop.2>
+; CHECK-NEXT: Member: {{\{\{}}(64 + %arr),+,64}<%loop.1>,+,8}<nw><%loop.2>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %arr High: (8000 + %arr))
; CHECK-NEXT: Member: {%arr,+,8}<nuw><%loop.2>
@@ -167,7 +167,7 @@ define void @test_indirect_write_loop_also_modifies_pointer_array(ptr noundef %a
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: {(64 + %arr),+,64}<%loop.1> High: {(8064 + %arr),+,64}<%loop.1>)
-; CHECK-NEXT: Member: {{\{\{}}(64 + %arr),+,64}<%loop.1>,+,8}<%loop.2>
+; CHECK-NEXT: Member: {{\{\{}}(64 + %arr),+,64}<%loop.1>,+,8}<nw><%loop.2>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %arr High: (8000 + %arr))
; CHECK-NEXT: Member: {%arr,+,8}<nuw><%loop.2>
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll b/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
index 44a4721c94c61..63b7a0475970f 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/number-of-memchecks.ll
@@ -247,7 +247,7 @@ for.end: ; preds = %for.body
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: ((2 * %offset) + %a) High: (10000 + (2 * %offset) + %a))
-; CHECK-NEXT: Member: {((2 * %offset) + %a),+,2}<%for.body>
+; CHECK-NEXT: Member: {((2 * %offset) + %a),+,2}<nw><%for.body>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %a High: (10000 + %a))
; CHECK-NEXT: Member: {%a,+,2}<nw><%for.body>
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll
index d1d1ecb2af888..425e95470623e 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll
@@ -50,7 +50,7 @@ define void @dependency_check_and_runtime_checks_needed_select_of_invariant_ptrs
; CHECK-NEXT: Member: %c
; CHECK-NEXT: Group GRP3:
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
-; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<nw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -128,7 +128,7 @@ define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs(p
; CHECK-NEXT: Member: {%c,+,4}<%loop>
; CHECK-NEXT: Group GRP3:
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
-; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<nw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -169,7 +169,7 @@ define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_m
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
-; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<nw><%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
@@ -218,7 +218,7 @@ define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs_m
; CHECK-NEXT: Grouped accesses:
; CHECK-NEXT: Group GRP0:
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
-; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<nw><%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a))
; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
index 63abd4ef70d63..64b445d9f9297 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll
@@ -28,10 +28,10 @@ define void @dependency_check_and_runtime_checks_needed_gepb_is_inbounds_iv2_ste
; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %b High: (-16 + (20 * %n) + %b))
-; CHECK-NEXT: Member: {%b,+,20}<%loop>
+; CHECK-NEXT: Member: {%b,+,20}<nuw><%loop>
; CHECK-NEXT: Group GRP2:
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
-; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<nw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -92,7 +92,7 @@ define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_st
; CHECK-NEXT: Member: {%b,+,5}<%loop>
; CHECK-NEXT: Group GRP2:
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
-; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<nw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -154,7 +154,7 @@ define void @dependency_check_and_runtime_checks_needed_gepb_is_inbounds_iv2_ste
; CHECK-NEXT: Member: {%b,+,1}<%loop>
; CHECK-NEXT: Group GRP2:
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
-; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<nw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -220,7 +220,7 @@ define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_st
; CHECK-NEXT: Member: {%b,+,1}<%loop>
; CHECK-NEXT: Group GRP2:
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
-; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<nw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -285,7 +285,7 @@ define void @dependency_check_and_runtime_checks_needed_gepb_may_wrap(ptr %a, pt
; CHECK-NEXT: Member: {%b,+,8}<%loop>
; CHECK-NEXT: Group GRP2:
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
-; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop>
+; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<nw><%loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
index 1c48b0ed0f967..075625f4027c4 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll
@@ -24,8 +24,8 @@ define void @single_stride(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride)
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul:
-; CHECK-NEXT: {%A,+,(4 * %stride)}<%loop>
-; CHECK-NEXT: --> {%A,+,4}<%loop>
+; CHECK-NEXT: {%A,+,(4 * %stride)}<nw><%loop>
+; CHECK-NEXT: --> {%A,+,4}<nw><%loop>
;
entry:
br label %loop
@@ -69,8 +69,8 @@ define void @single_stride_nusw(ptr noalias %A, ptr noalias %B, i64 %N, i64 %str
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.A = getelementptr nusw i32, ptr %A, i64 %mul:
-; CHECK-NEXT: {%A,+,(4 * %stride)}<%loop>
-; CHECK-NEXT: --> {%A,+,4}<%loop>
+; CHECK-NEXT: {%A,+,(4 * %stride)}<nw><%loop>
+; CHECK-NEXT: --> {%A,+,4}<nw><%loop>
;
entry:
br label %loop
@@ -113,8 +113,8 @@ define void @single_stride_struct(ptr noalias %A, ptr noalias %B, i64 %N, i64 %s
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds { i32, i8 }, ptr %A, i64 %mul:
-; CHECK-NEXT: {%A,+,(8 * %stride)}<%loop>
-; CHECK-NEXT: --> {%A,+,8}<%loop>
+; CHECK-NEXT: {%A,+,(8 * %stride)}<nw><%loop>
+; CHECK-NEXT: --> {%A,+,8}<nw><%loop>
;
entry:
br label %loop
@@ -160,8 +160,8 @@ define void @single_stride_array(ptr noalias %A, ptr noalias %B, i64 %N, i64 %st
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds [2 x i32], ptr %A, i64 %mul, i64 1:
-; CHECK-NEXT: {(4 + %A),+,(8 * %stride)}<%loop>
-; CHECK-NEXT: --> {(4 + %A),+,8}<%loop>
+; CHECK-NEXT: {(4 + %A),+,(8 * %stride)}<nw><%loop>
+; CHECK-NEXT: --> {(4 + %A),+,8}<nw><%loop>
;
entry:
br label %loop
@@ -266,7 +266,7 @@ define void @single_stride_castexpr_multiuse(i32 %offset, ptr %src, ptr %dst, i1
; CHECK-NEXT: Member: {((4 * %iv.1) + %dst),+,4}<%inner.loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: (4 + %src) High: (808 + (-4 * (zext i32 %offset to i64))<nsw> + %src))
-; CHECK-NEXT: Member: {(4 + %src),+,4}<%inner.loop>
+; CHECK-NEXT: Member: {(4 + %src),+,4}<nuw><%inner.loop>
; CHECK-EMPTY:
; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop.
; CHECK-NEXT: SCEV assumptions:
@@ -274,8 +274,8 @@ define void @single_stride_castexpr_multiuse(i32 %offset, ptr %src, ptr %dst, i1
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv.3:
-; CHECK-NEXT: {((4 * (zext i32 %offset to i64))<nuw><nsw> + %src),+,4}<%inner.loop>
-; CHECK-NEXT: --> {(4 + %src),+,4}<%inner.loop>
+; CHECK-NEXT: {((4 * (zext i32 %offset to i64))<nuw><nsw> + %src),+,4}<nuw><%inner.loop>
+; CHECK-NEXT: --> {(4 + %src),+,4}<nuw><%inner.loop>
; CHECK-NEXT: [PSE] %gep.dst = getelementptr i32, ptr %dst, i64 %iv.2:
; CHECK-NEXT: {((4 * %iv.1) + %dst),+,(4 * (sext i32 %offset to i64))<nsw>}<%inner.loop>
; CHECK-NEXT: --> {((4 * %iv.1) + %dst),+,4}<%inner.loop>
@@ -384,11 +384,11 @@ define void @two_strides(ptr noalias %A, ptr noalias %B, i64 %N, i64 %stride.1,
; CHECK-EMPTY:
; CHECK-NEXT: Expressions re-written:
; CHECK-NEXT: [PSE] %gep.A = getelementptr inbounds i32, ptr %A, i64 %mul:
-; CHECK-NEXT: {%A,+,(4 * %stride.1)}<%loop>
-; CHECK-NEXT: --> {%A,+,4}<%loop>
+; CHECK-NEXT: {%A,+,(4 * %stride.1)}<nw><%loop>
+; CHECK-NEXT: --> {%A,+,4}<nw><%loop>
; CHECK-NEXT: [PSE] %gep.A.next = getelementptr inbounds i32, ptr %A, i64 %mul.2:
-; CHECK-NEXT: {((4 * %stride.2) + %A),+,(4 * %stride.2)}<%loop>
-; CHECK-NEXT: --> {(4 + %A),+,4}<%loop>
+; CHECK-NEXT: {((4 * %stride.2) + %A),+,(4 * %stride.2)}<nw><%loop>
+; CHECK-NEXT: --> {(4 + %A),+,4}<nw><%loop>
;
entry:
br label %loop
diff --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
index 593888f5f7bd5..2b2a8e1e6f357 100644
--- a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
+++ b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
@@ -861,9 +861,9 @@ define void @test-add-mul-propagates(ptr %input, i32 %offset, i32 %numIterations
; CHECK-NEXT: %index32 = add nsw i32 %i, %offset
; CHECK-NEXT: --> {%offset,+,1}<nsw><%loop> U: full-set S: full-set Exits: (-1 + %offset + %numIterations) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %indexmul = mul nuw i32 %index32, 2
-; CHECK-NEXT: --> {(2 * %offset),+,2}<%loop> U: [0,-1) S: [-2147483648,2147483647) Exits: (-2 + (2 * %offset) + (2 * %numIterations)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(2 * %offset),+,2}<nuw><%loop> U: [0,-1) S: [-2147483648,2147483647) Exits: (-2 + (2 * %offset) + (2 * %numIterations)) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %ptr = getelementptr inbounds float, ptr %input, i32 %indexmul
-; CHECK-NEXT: --> ((4 * (sext i32 {(2 * %offset),+,2}<%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: ((4 * (sext i32 (-2 + (2 * %offset) + (2 * %numIterations)) to i64))<nsw> + %input) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ((4 * (sext i32 {(2 * %offset),+,2}<nuw><%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: ((4 * (sext i32 (-2 + (2 * %offset) + (2 * %numIterations)) to i64))<nsw> + %input) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %nexti = add nsw i32 %i, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,-2147483648) S: [1,-2147483648) Exits: %numIterations LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test-add-mul-propagates
@@ -898,9 +898,9 @@ define void @test-mul-propagates-poison(ptr %input, i32 %offset, i32 %numIterati
; CHECK-NEXT: %index32 = add nsw i32 %i, %offset
; CHECK-NEXT: --> {%offset,+,1}<nsw><%loop> U: full-set S: full-set Exits: (-1 + %offset + %numIterations) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %indexmul = mul nsw i32 %index32, %offset
-; CHECK-NEXT: --> {(%offset * %offset),+,%offset}<%loop> U: full-set S: full-set Exits: ((-1 + %offset + %numIterations) * %offset) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(%offset * %offset),+,%offset}<nsw><%loop> U: full-set S: full-set Exits: ((-1 + %offset + %numIterations) * %offset) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %ptr = getelementptr inbounds float, ptr %input, i32 %indexmul
-; CHECK-NEXT: --> ((4 * (sext i32 {(%offset * %offset),+,%offset}<%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: ((4 * (sext i32 ((-1 + %offset + %numIterations) * %offset) to i64))<nsw> + %input) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {((4 * (sext i32 (%offset * %offset) to i64))<nsw> + %input),+,(4 * (sext i32 %offset to i64))<nsw>}<nw><%loop> U: full-set S: full-set Exits: ((4 * (sext i32 (%offset * %offset) to i64))<nsw> + (4 * (zext i32 (-1 + %numIterations) to i64) * (sext i32 %offset to i64)) + %input) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %nexti = add nsw i32 %i, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%loop> U: [1,-2147483648) S: [1,-2147483648) Exits: %numIterations LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test-mul-propagates-poison
@@ -1190,11 +1190,11 @@ define void @test-shl-nsw(ptr %input, i32 %start, i32 %numIterations) {
; CHECK-NEXT: %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
; CHECK-NEXT: --> {%start,+,1}<nsw><%loop> U: full-set S: full-set Exits: (-1 + %numIterations) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index32 = shl nsw i32 %i, 8
-; CHECK-NEXT: --> {(256 * %start),+,256}<%loop> U: [0,-255) S: [-2147483648,2147483393) Exits: (-256 + (256 * %numIterations)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(256 * %start),+,256}<nsw><%loop> U: [0,-255) S: [-2147483648,2147483393) Exits: (-256 + (256 * %numIterations)) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index64 = sext i32 %index32 to i64
-; CHECK-NEXT: --> (sext i32 {(256 * %start),+,256}<%loop> to i64) U: [0,-255) S: [-2147483648,2147483393) Exits: (sext i32 (-256 + (256 * %numIterations)) to i64) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(sext i32 (256 * %start) to i64),+,256}<nsw><%loop> U: [0,-255) S: [-2147483648,1101659110913) Exits: ((sext i32 (256 * %start) to i64) + (256 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64))<nuw><nsw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %ptr = getelementptr inbounds float, ptr %input, i64 %index64
-; CHECK-NEXT: --> ((4 * (sext i32 {(256 * %start),+,256}<%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: ((4 * (sext i32 (-256 + (256 * %numIterations)) to i64))<nsw> + %input) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {((4 * (sext i32 (256 * %start) to i64))<nsw> + %input),+,1024}<nw><%loop> U: full-set S: full-set Exits: ((4 * (sext i32 (256 * %start) to i64))<nsw> + (1024 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64))<nuw><nsw> + %input) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %nexti = add nsw i32 %i, 1
; CHECK-NEXT: --> {(1 + %start)<nsw>,+,1}<nsw><%loop> U: [-2147483647,-2147483648) S: [-2147483647,-2147483648) Exits: %numIterations LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test-shl-nsw
@@ -1266,11 +1266,11 @@ define void @test-shl-nuw-nsw(ptr %input, i32 %start, i32 %numIterations) {
; CHECK-NEXT: %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
; CHECK-NEXT: --> {%start,+,1}<nsw><%loop> U: full-set S: full-set Exits: (-1 + %numIterations) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index32 = shl nuw nsw i32 %i, 31
-; CHECK-NEXT: --> {(-2147483648 * %start),+,-2147483648}<%loop> U: [0,-2147483647) S: [-2147483648,1) Exits: (-2147483648 + (-2147483648 * %numIterations)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(-2147483648 * %start),+,-2147483648}<nsw><%loop> U: [0,-2147483647) S: [-2147483648,1) Exits: (-2147483648 + (-2147483648 * %numIterations)) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index64 = sext i32 %index32 to i64
-; CHECK-NEXT: --> (sext i32 {(-2147483648 * %start),+,-2147483648}<%loop> to i64) U: [0,-2147483647) S: [-2147483648,1) Exits: (sext i32 (-2147483648 + (-2147483648 * %numIterations)) to i64) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(sext i32 (-2147483648 * %start) to i64),+,-2147483648}<nsw><%loop> U: [0,-2147483647) S: [-9223372036854775808,1) Exits: ((sext i32 (-2147483648 * %start) to i64) + (-2147483648 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64))<nsw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %ptr = getelementptr inbounds float, ptr %input, i64 %index64
-; CHECK-NEXT: --> ((4 * (sext i32 {(-2147483648 * %start),+,-2147483648}<%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: ((4 * (sext i32 (-2147483648 + (-2147483648 * %numIterations)) to i64))<nsw> + %input) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {((4 * (sext i32 (-2147483648 * %start) to i64))<nsw> + %input),+,-8589934592}<nw><%loop> U: full-set S: full-set Exits: ((4 * (sext i32 (-2147483648 * %start) to i64))<nsw> + (-8589934592 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64)) + %input) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %nexti = add nsw i32 %i, 1
; CHECK-NEXT: --> {(1 + %start)<nsw>,+,1}<nsw><%loop> U: [-2147483647,-2147483648) S: [-2147483647,-2147483648) Exits: %numIterations LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test-shl-nuw-nsw
@@ -1342,11 +1342,11 @@ define void @test-shl-nsw-edgecase(ptr %input, i32 %start, i32 %numIterations) {
; CHECK-NEXT: %i = phi i32 [ %nexti, %loop ], [ %start, %entry ]
; CHECK-NEXT: --> {%start,+,1}<nsw><%loop> U: full-set S: full-set Exits: (-1 + %numIterations) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index32 = shl nsw i32 %i, 30
-; CHECK-NEXT: --> {(1073741824 * %start),+,1073741824}<%loop> U: [0,-1073741823) S: [-2147483648,1073741825) Exits: (-1073741824 + (1073741824 * %numIterations)) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(1073741824 * %start),+,1073741824}<nsw><%loop> U: [0,-1073741823) S: [-2147483648,1073741825) Exits: (-1073741824 + (1073741824 * %numIterations)) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %index64 = sext i32 %index32 to i64
-; CHECK-NEXT: --> (sext i32 {(1073741824 * %start),+,1073741824}<%loop> to i64) U: [0,-1073741823) S: [-2147483648,1073741825) Exits: (sext i32 (-1073741824 + (1073741824 * %numIterations)) to i64) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {(sext i32 (1073741824 * %start) to i64),+,1073741824}<nsw><%loop> U: [0,-1073741823) S: [-2147483648,4611686018427387905) Exits: ((sext i32 (1073741824 * %start) to i64) + (1073741824 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64))<nuw><nsw>) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %ptr = getelementptr inbounds float, ptr %input, i64 %index64
-; CHECK-NEXT: --> ((4 * (sext i32 {(1073741824 * %start),+,1073741824}<%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: ((4 * (sext i32 (-1073741824 + (1073741824 * %numIterations)) to i64))<nsw> + %input) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {((4 * (sext i32 (1073741824 * %start) to i64))<nsw> + %input),+,4294967296}<nw><%loop> U: full-set S: full-set Exits: ((4 * (sext i32 (1073741824 * %start) to i64))<nsw> + (4294967296 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64))<nuw> + %input) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %nexti = add nsw i32 %i, 1
; CHECK-NEXT: --> {(1 + %start)<nsw>,+,1}<nsw><%loop> U: [-2147483647,-2147483648) S: [-2147483647,-2147483648) Exits: %numIterations LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test-shl-nsw-edgecase
diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
index 9bf2427eddb9c..db0059ad9290f 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll
@@ -9,7 +9,7 @@ define void @test_guard_less_than_16(ptr nocapture %a, i64 %i) {
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ]
; CHECK-NEXT: --> {%i,+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 15 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
-; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<nw><%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {(1 + %i),+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 16 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_guard_less_than_16
@@ -40,7 +40,7 @@ define void @test_guard_less_than_16_operands_swapped(ptr nocapture %a, i64 %i)
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ]
; CHECK-NEXT: --> {%i,+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 15 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
-; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<nw><%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {(1 + %i),+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 16 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_guard_less_than_16_operands_swapped
@@ -71,7 +71,7 @@ define void @test_guard_less_than_16_branches_flipped(ptr nocapture %a, i64 %i)
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ]
; CHECK-NEXT: --> {%i,+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 15 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
-; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<nw><%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {(1 + %i),+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 16 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_guard_less_than_16_branches_flipped
@@ -102,7 +102,7 @@ define void @test_guard_uge_16_branches_flipped(ptr nocapture %a, i64 %i) {
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ]
; CHECK-NEXT: --> {%i,+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 15 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
-; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<nw><%loop> U: full-set S: full-set Exits: (60 + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {(1 + %i),+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 16 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_guard_uge_16_branches_flipped
@@ -369,7 +369,7 @@ define void @test_guard_if_and_skip(ptr nocapture readonly %data, i64 %count) {
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (-1 + %count) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %data, i64 %iv
-; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%data,+,4}<nuw><%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><%loop> U: [1,0) S: [1,0) Exits: %count LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_guard_if_and_skip
@@ -444,7 +444,7 @@ define void @test_guard_if_and_or(ptr nocapture readonly %data, i64 %count, i1 %
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (-1 + %count) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %data, i64 %iv
-; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%data,+,4}<nuw><%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><%loop> U: [1,0) S: [1,0) Exits: %count LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_guard_if_and_or
@@ -515,7 +515,7 @@ define void @test_guard_if_or_enter(ptr nocapture readonly %data, i64 %count) {
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (-1 + %count) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %data, i64 %iv
-; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%data,+,4}<nuw><%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><%loop> U: [1,0) S: [1,0) Exits: %count LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_guard_if_or_enter
@@ -590,7 +590,7 @@ define void @test_guard_if_or_and(ptr nocapture readonly %data, i64 %count, i1 %
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: (-1 + %count) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %data, i64 %iv
-; CHECK-NEXT: --> {%data,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {%data,+,4}<nuw><%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %data) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw i64 %iv, 1
; CHECK-NEXT: --> {1,+,1}<nuw><%loop> U: [1,0) S: [1,0) Exits: %count LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_guard_if_or_and
@@ -1013,7 +1013,7 @@ define void @test_guard_slt_sgt_2(ptr nocapture %a, i64 %i) {
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ]
; CHECK-NEXT: --> {%i,+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 17 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
-; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (68 + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<nw><%loop> U: full-set S: full-set Exits: (68 + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {(1 + %i),+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 18 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_guard_slt_sgt_2
@@ -1083,7 +1083,7 @@ define void @test_guard_sle_sge_2(ptr nocapture %a, i64 %i) {
; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ %i, %entry ]
; CHECK-NEXT: --> {%i,+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 17 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %idx = getelementptr inbounds i32, ptr %a, i64 %iv
-; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<%loop> U: full-set S: full-set Exits: (68 + %a) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> {((4 * %i) + %a),+,4}<nw><%loop> U: full-set S: full-set Exits: (68 + %a) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 1
; CHECK-NEXT: --> {(1 + %i),+,1}<nuw><nsw><%loop> U: full-set S: full-set Exits: 18 LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test_guard_sle_sge_2
diff --git a/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll b/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll
index e1c05c4b431f3..4bf6f82837d89 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll
@@ -77,11 +77,11 @@ define void @smax(i32 %tmp3) {
; CHECK-NEXT: %tmp55 = trunc i64 %tmp54 to i32
; CHECK-NEXT: --> {(trunc i64 undef to i32),+,1}<%bb53> U: full-set S: full-set Exits: (-1 + (0 smax %tmp49))<nsw> LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp56 = shl nsw i32 %tmp55, 3
-; CHECK-NEXT: --> {(8 * (trunc i64 undef to i32)),+,8}<%bb53> U: [0,-7) S: [-2147483648,2147483641) Exits: (-8 + (8 * (0 smax %tmp49))<nuw><nsw>)<nsw> LoopDispositions: { %bb53: Computable, %bb4: Variant }
+; CHECK-NEXT: --> {(8 * (trunc i64 undef to i32)),+,8}<nsw><%bb53> U: [0,-7) S: [-2147483648,2147483641) Exits: (-8 + (8 * (0 smax %tmp49))<nuw><nsw>)<nsw> LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp57 = sext i32 %tmp56 to i64
-; CHECK-NEXT: --> (sext i32 {(8 * (trunc i64 undef to i32)),+,8}<%bb53> to i64) U: [0,-7) S: [-2147483648,2147483641) Exits: (-8 + (8 * (zext i32 (0 smax %tmp49) to i64))<nuw><nsw>)<nsw> LoopDispositions: { %bb53: Computable, %bb4: Variant }
+; CHECK-NEXT: --> {(sext i32 (8 * (trunc i64 undef to i32)) to i64),+,8}<nsw><%bb53> U: [0,-7) S: [-2147483648,9223372036854775801) Exits: (-8 + (sext i32 (8 * (trunc i64 undef to i32)) to i64) + (8 * (zext i32 (0 smax %tmp49) to i64))<nuw><nsw> + (-8 * undef)) LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp58 = getelementptr inbounds i8, ptr null, i64 %tmp57
-; CHECK-NEXT: --> ((sext i32 {(8 * (trunc i64 undef to i32)),+,8}<%bb53> to i64) + null) U: [0,-7) S: [-2147483648,2147483641) Exits: (-8 + (8 * (zext i32 (0 smax %tmp49) to i64))<nuw><nsw> + null) LoopDispositions: { %bb53: Computable, %bb4: Variant }
+; CHECK-NEXT: --> {((sext i32 (8 * (trunc i64 undef to i32)) to i64) + null),+,8}<nw><%bb53> U: [0,-7) S: [-9223372036854775808,9223372036854775801) Exits: (-8 + (sext i32 (8 * (trunc i64 undef to i32)) to i64) + (8 * (zext i32 (0 smax %tmp49) to i64))<nuw><nsw> + (-8 * undef) + null) LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp59 = add nsw i64 %tmp54, 1
; CHECK-NEXT: --> {(1 + undef),+,1}<nsw><%bb53> U: full-set S: full-set Exits: (zext i32 (0 smax %tmp49) to i64) LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp62 = add nuw nsw i64 %tmp5, 1
@@ -236,11 +236,11 @@ define void @umax(i32 %tmp3) {
; CHECK-NEXT: %tmp55 = trunc i64 %tmp54 to i32
; CHECK-NEXT: --> {(trunc i64 undef to i32),+,1}<%bb53> U: full-set S: full-set Exits: (-1 + %tmp49)<nsw> LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp56 = shl nsw i32 %tmp55, 3
-; CHECK-NEXT: --> {(8 * (trunc i64 undef to i32)),+,8}<%bb53> U: [0,-7) S: [-2147483648,2147483641) Exits: (-8 + (8 * %tmp49)<nsw>) LoopDispositions: { %bb53: Computable, %bb4: Variant }
+; CHECK-NEXT: --> {(8 * (trunc i64 undef to i32)),+,8}<nsw><%bb53> U: [0,-7) S: [-2147483648,2147483641) Exits: (-8 + (8 * %tmp49)<nsw>) LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp57 = sext i32 %tmp56 to i64
-; CHECK-NEXT: --> (sext i32 {(8 * (trunc i64 undef to i32)),+,8}<%bb53> to i64) U: [0,-7) S: [-2147483648,2147483641) Exits: (sext i32 (-8 + (8 * %tmp49)<nsw>) to i64) LoopDispositions: { %bb53: Computable, %bb4: Variant }
+; CHECK-NEXT: --> {(sext i32 (8 * (trunc i64 undef to i32)) to i64),+,8}<nsw><%bb53> U: [0,-7) S: [-2147483648,9223372036854775801) Exits: (-8 + (sext i32 (8 * (trunc i64 undef to i32)) to i64) + (8 * (zext i32 %tmp49 to i64))<nuw><nsw> + (-8 * undef)) LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp58 = getelementptr inbounds i8, ptr null, i64 %tmp57
-; CHECK-NEXT: --> ((sext i32 {(8 * (trunc i64 undef to i32)),+,8}<%bb53> to i64) + null) U: [0,-7) S: [-2147483648,2147483641) Exits: ((sext i32 (-8 + (8 * %tmp49)<nsw>) to i64) + null) LoopDispositions: { %bb53: Computable, %bb4: Variant }
+; CHECK-NEXT: --> {((sext i32 (8 * (trunc i64 undef to i32)) to i64) + null),+,8}<nw><%bb53> U: [0,-7) S: [-9223372036854775808,9223372036854775801) Exits: (-8 + (sext i32 (8 * (trunc i64 undef to i32)) to i64) + (8 * (zext i32 %tmp49 to i64))<nuw><nsw> + (-8 * undef) + null) LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp59 = add nsw i64 %tmp54, 1
; CHECK-NEXT: --> {(1 + undef),+,1}<nsw><%bb53> U: full-set S: full-set Exits: (zext i32 %tmp49 to i64) LoopDispositions: { %bb53: Computable, %bb4: Variant }
; CHECK-NEXT: %tmp62 = add nuw nsw i64 %tmp5, 1
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll
index 30a095fd144fa..9fe4b1ce5c0a9 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll
@@ -375,7 +375,7 @@ define void @vscale_slt_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_ra
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,vscale}<%for.body> U: full-set S: full-set Exits: (vscale * ((-1 + %n) /u vscale))<nuw> LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
-; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {%A,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((4 * vscale * ((-1 + %n) /u vscale)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add i32 %i.05, %vscale
; CHECK-NEXT: --> {vscale,+,vscale}<nuw><nsw><%for.body> U: [2,-2147483648) S: [2,-2147483648) Exits: (vscale * (1 + ((-1 + %n) /u vscale))<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_noflags
@@ -413,7 +413,7 @@ define void @vscalex4_ult_noflags(ptr nocapture %A, i32 %n) mustprogress vscale_
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<%for.body> U: [0,-3) S: [-2147483648,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
-; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<nuw><%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add i32 %i.05, %VF
; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><%for.body> U: [8,-3) S: [-2147483648,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscalex4_ult_noflags
@@ -455,7 +455,7 @@ define void @vscale_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale_r
; CHECK-NEXT: %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
; CHECK-NEXT: --> {((-1 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-1 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
-; CHECK-NEXT: --> {((4 * %n) + (-4 * vscale)<nsw> + %A),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-4 + (-4 * (((-2 * vscale)<nsw> + %n) /u vscale)))) + %A) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {((4 * %n) + (-4 * vscale)<nsw> + %A),+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-4 + (-4 * (((-2 * vscale)<nsw> + %n) /u vscale)))) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %sub = sub i32 %iv, %vscale
; CHECK-NEXT: --> {((-2 * vscale)<nsw> + %n),+,(-1 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-2 + (-1 * (((-2 * vscale)<nsw> + %n) /u vscale))<nsw>)) + %n) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscale_countdown_ne
@@ -496,7 +496,7 @@ define void @vscalex4_countdown_ne(ptr nocapture %A, i32 %n) mustprogress vscale
; CHECK-NEXT: %iv = phi i32 [ %sub, %for.body ], [ %start, %entry ]
; CHECK-NEXT: --> {((-4 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((vscale * (-4 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)<nsw>) + %n) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %iv
-; CHECK-NEXT: --> {((4 * %n) + (-16 * vscale)<nsw> + %A),+,(-16 * vscale)<nsw>}<%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-16 + (-16 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)))) + %A) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {((4 * %n) + (-16 * vscale)<nsw> + %A),+,(-16 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((4 * %n) + (vscale * (-16 + (-16 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>)))) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %sub = sub i32 %iv, %VF
; CHECK-NEXT: --> {((-8 * vscale)<nsw> + %n),+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((vscale * (-8 + (-4 * (((-8 * vscale)<nsw> + %n) /u (4 * vscale)<nuw><nsw>))<nsw>)) + %n) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscalex4_countdown_ne
diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll
index 89ddba3343ffa..d1a1636767d65 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll
@@ -282,7 +282,7 @@ define void @ebur128_calc_gating_block(ptr %st, ptr %optional_output) {
; CHECK-NEXT: br i1 [[CMP525_NOT]], label [[FOR_INC11]], label [[FOR_BODY7_LR_PH:%.*]]
; CHECK: for.body7.lr.ph:
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[AUDIO_DATA]], align 8
-; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[TMP3]], i64 [[LSR_IV1]]
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr nuw i8, ptr [[TMP3]], i64 [[LSR_IV1]]
; CHECK-NEXT: br label [[FOR_BODY7:%.*]]
; CHECK: for.body7:
; CHECK-NEXT: [[LSR_IV3:%.*]] = phi ptr [ [[SCEVGEP4:%.*]], [[FOR_BODY7]] ], [ [[SCEVGEP]], [[FOR_BODY7_LR_PH]] ]
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
index 89ce66767ccc9..bafdbd3d90300 100644
--- a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
+++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam.ll
@@ -210,7 +210,7 @@ define void @test2(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD9_2]]
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
; CHECK-NEXT: [[ADD9_3]] = add nuw i32 [[I]], 4
-; CHECK-NEXT: [[NITER_NEXT_3]] = add i32 [[NITER]], 4
+; CHECK-NEXT: [[NITER_NEXT_3]] = add nuw i32 [[NITER]], 4
; CHECK-NEXT: br label [[FOR_INNER:%.*]]
; CHECK: for.inner:
; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[INC:%.*]], [[FOR_INNER]] ]
@@ -727,10 +727,10 @@ define void @test7(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_2]], align 4, !tbaa [[TBAA0]]
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]]
; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT: [[ADD_3]] = add nuw i32 [[I]], 4
+; CHECK-NEXT: [[ADD_3]] = add nuw nsw i32 [[I]], 4
; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_3]]
; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX2_3]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT: [[NITER_NEXT_3]] = add i32 [[NITER]], 4
+; CHECK-NEXT: [[NITER_NEXT_3]] = add nuw nsw i32 [[NITER]], 4
; CHECK-NEXT: br label [[FOR_INNER:%.*]]
; CHECK: for.latch:
; CHECK-NEXT: [[ADD9_LCSSA:%.*]] = phi i32 [ [[ADD9:%.*]], [[FOR_INNER]] ]
@@ -925,10 +925,10 @@ define void @test8(i32 %I, i32 %E, ptr noalias nocapture %A, ptr noalias nocaptu
; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_2]], align 4, !tbaa [[TBAA0]]
; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_2]]
; CHECK-NEXT: store i32 0, ptr [[ARRAYIDX_3]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT: [[ADD_3]] = add nuw i32 [[I]], 4
+; CHECK-NEXT: [[ADD_3]] = add nuw nsw i32 [[I]], 4
; CHECK-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[ADD_3]]
; CHECK-NEXT: store i32 2, ptr [[ARRAYIDX6_3]], align 4, !tbaa [[TBAA0]]
-; CHECK-NEXT: [[NITER_NEXT_3]] = add i32 [[NITER]], 4
+; CHECK-NEXT: [[NITER_NEXT_3]] = add nuw nsw i32 [[NITER]], 4
; CHECK-NEXT: br label [[FOR_INNER:%.*]]
; CHECK: for.inner:
; CHECK-NEXT: [[SUM:%.*]] = phi i32 [ 0, [[FOR_OUTER]] ], [ [[ADD9:%.*]], [[FOR_INNER]] ]
>From c993c0b5ff454d94b233a0fb86a42d77b4100e57 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Sat, 23 Aug 2025 23:21:21 -0500
Subject: [PATCH 6/7] [SCEV] Fix NSW flag propagation in getGEPExpr
SCEV was losing NSW flags during AddRec operations, causing Dependence Analysis
to add unnecessary runtime assumptions for inbounds GEPs.
This patch fixes getGEPExpr: inherit flags from index expressions when GEP has
no explicit flags, allowing NSW flags from AddRec indices to propagate to the
final GEP result.
This eliminates spurious runtime assumptions in DA for expressions like
{0,+,(4 * %N * %M)} derived from inbounds GEPs, allowing proper dependence
analysis without conservative runtime checks.
---
llvm/lib/Analysis/ScalarEvolution.cpp | 25 +++++++++++++
.../Delinearization/fixed_size_array.ll | 18 ++++-----
.../scev-nsw-flags-enable-analysis.ll | 37 +++++++++++++++++++
...ter-dependence-analysis-forked-pointers.ll | 4 +-
.../ScalarEvolution/flags-from-poison.ll | 8 ++--
llvm/test/Analysis/ScalarEvolution/nsw.ll | 8 ++--
.../test/Analysis/ScalarEvolution/ptrtoint.ll | 8 ++--
.../trip-count-scalable-stride.ll | 4 +-
.../LoopIdiom/memset-runtime-debug.ll | 8 ++--
9 files changed, 91 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 3cd5b34d16838..5763ef0d1fefa 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3798,6 +3798,31 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
if (NW.hasNoUnsignedWrap())
OffsetWrap = setFlags(OffsetWrap, SCEV::FlagNUW);
+ // Inherit flags from index expressions when GEP has no explicit flags.
+ if (OffsetWrap == SCEV::FlagAnyWrap) {
+ // Check if all index expressions have compatible no-wrap flags
+ bool AllHaveNSW = true, AllHaveNUW = true;
+ for (const SCEV *IndexExpr : IndexExprs) {
+ if (auto *AR = dyn_cast<SCEVAddRecExpr>(IndexExpr)) {
+ if (!AR->hasNoSignedWrap())
+ AllHaveNSW = false;
+ if (!AR->hasNoUnsignedWrap())
+ AllHaveNUW = false;
+ } else {
+ // Be conservative for non-AddRec expressions.
+ AllHaveNSW = false;
+ AllHaveNUW = false;
+ break;
+ }
+ }
+ // Inherit NSW if all have NSW.
+ if (AllHaveNSW)
+ OffsetWrap = setFlags(OffsetWrap, SCEV::FlagNSW);
+ // Inherit NUW if all have NUW.
+ if (AllHaveNUW)
+ OffsetWrap = setFlags(OffsetWrap, SCEV::FlagNUW);
+ }
+
Type *CurTy = GEP->getType();
bool FirstIter = true;
SmallVector<const SCEV *, 4> Offsets;
diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
index ffd0202e205ce..c7f35906037eb 100644
--- a/llvm/test/Analysis/Delinearization/fixed_size_array.ll
+++ b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
@@ -12,7 +12,7 @@ define void @a_i_j_k(ptr %a) {
; CHECK-LABEL: 'a_i_j_k'
; CHECK-NEXT: Inst: store i32 1, ptr %idx, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,128}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,128}<nuw><nsw><%for.j.header>,+,4}<nuw><nsw><%for.k>
; CHECK-NEXT: Base offset: %a
; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
@@ -61,7 +61,7 @@ define void @a_i_nj_k(ptr %a) {
; CHECK-LABEL: 'a_i_nj_k'
; CHECK-NEXT: Inst: store i32 1, ptr %idx, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}896,+,1024}<nuw><nsw><%for.i.header>,+,-128}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}896,+,1024}<nuw><nsw><%for.i.header>,+,-128}<nsw><%for.j.header>,+,4}<nuw><nsw><%for.k>
; CHECK-NEXT: Base offset: %a
; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{7,+,-1}<nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
@@ -117,14 +117,14 @@ define void @a_ijk_b_i2jk(ptr %a, ptr %b) {
; CHECK-LABEL: 'a_ijk_b_i2jk'
; CHECK-NEXT: Inst: store i32 1, ptr %a.idx, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,256}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,256}<nuw><nsw><%for.j.header>,+,4}<nuw><nsw><%for.k>
; CHECK-NEXT: Base offset: %a
; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
; CHECK-EMPTY:
; CHECK-NEXT: Inst: store i32 1, ptr %b.idx, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,256}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,256}<nuw><nsw><%for.j.header>,+,4}<nuw><nsw><%for.k>
; CHECK-NEXT: Base offset: %b
; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,1}<nuw><nsw><%for.k>]
@@ -181,10 +181,10 @@ define void @a_i_2j1_k(ptr %a) {
; CHECK-LABEL: 'a_i_2j1_k'
; CHECK-NEXT: Inst: store i32 1, ptr %idx, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}128,+,1024}<nuw><nsw><%for.i.header>,+,256}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}128,+,1024}<nuw><nsw><%for.i.header>,+,256}<nuw><nsw><%for.j.header>,+,4}<nuw><nsw><%for.k>
; CHECK-NEXT: Base offset: %a
; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
-; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><%for.j.header>][{32,+,1}<nw><%for.k>]
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{32,+,1}<nuw><nsw><%for.k>]
;
entry:
br label %for.i.header
@@ -235,7 +235,7 @@ define void @a_i_3j_k(ptr %a) {
; CHECK-LABEL: 'a_i_3j_k'
; CHECK-NEXT: Inst: store i32 1, ptr %idx, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,384}<nw><%for.j.header>,+,4}<nw><%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,384}<nuw><nsw><%for.j.header>,+,4}<nuw><nsw><%for.k>
; CHECK-NEXT: failed to delinearize
;
entry:
@@ -287,7 +287,7 @@ define void @a_i_j_3k(ptr %a) {
; CHECK-LABEL: 'a_i_j_3k'
; CHECK-NEXT: Inst: store i32 1, ptr %idx, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,128}<nw><%for.j.header>,+,12}<nw><%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,1024}<nuw><nsw><%for.i.header>,+,128}<nuw><nsw><%for.j.header>,+,12}<nuw><nsw><%for.k>
; CHECK-NEXT: Base offset: %a
; CHECK-NEXT: ArrayDecl[UnknownSize][8][32] with elements of 4 bytes.
; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><nsw><%for.j.header>][{0,+,3}<nuw><nsw><%for.k>]
@@ -503,7 +503,7 @@ define void @non_divisible_by_element_size(ptr %a) {
; CHECK-LABEL: 'non_divisible_by_element_size'
; CHECK-NEXT: Inst: store i32 1, ptr %idx, align 4
; CHECK-NEXT: In Loop with Header: for.k
-; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,256}<nuw><nsw><%for.i.header>,+,32}<nw><%for.j.header>,+,1}<nw><%for.k>
+; CHECK-NEXT: AccessFunction: {{\{\{\{}}0,+,256}<nuw><nsw><%for.i.header>,+,32}<nuw><nsw><%for.j.header>,+,1}<nuw><nsw><%for.k>
; CHECK-NEXT: failed to delinearize
;
entry:
diff --git a/llvm/test/Analysis/DependenceAnalysis/scev-nsw-flags-enable-analysis.ll b/llvm/test/Analysis/DependenceAnalysis/scev-nsw-flags-enable-analysis.ll
index b717ce9815341..0afbbf4a0a6f5 100644
--- a/llvm/test/Analysis/DependenceAnalysis/scev-nsw-flags-enable-analysis.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/scev-nsw-flags-enable-analysis.ll
@@ -43,3 +43,40 @@ loop:
exit:
ret void
}
+
+; Test showing different GEPs with same pattern work correctly
+define void @test_da_different_geps(ptr %A) {
+; CHECK-LABEL: 'test_da_different_geps'
+; CHECK-NEXT: Src: store i32 %conv, ptr %gep1, align 4 --> Dst: store i32 %conv, ptr %gep1, align 4
+; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Src: store i32 %conv, ptr %gep1, align 4 --> Dst: %val = load i32, ptr %gep2, align 4
+; CHECK-NEXT: da analyze - flow [*|<]!
+; CHECK-NEXT: Src: %val = load i32, ptr %gep2, align 4 --> Dst: %val = load i32, ptr %gep2, align 4
+; CHECK-NEXT: da analyze - none!
+;
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+
+ ; NSW-flagged arithmetic
+ %mul = mul nsw i64 %i, 3
+ %sub = add nsw i64 %mul, -6
+
+ ; Two different access patterns that DA can now analyze correctly
+ %gep1 = getelementptr inbounds [100 x i32], ptr %A, i64 %sub, i64 %sub
+ %gep2 = getelementptr inbounds [100 x i32], ptr %A, i64 %i, i64 %i
+
+ %conv = trunc i64 %i to i32
+ store i32 %conv, ptr %gep1
+ %val = load i32, ptr %gep2
+
+ %i.next = add nsw i64 %i, 1
+ %cond = icmp ult i64 %i.next, 50
+ br i1 %cond, label %loop, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll
index 425e95470623e..13b7967cb084c 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis-forked-pointers.ll
@@ -122,10 +122,10 @@ define void @dependency_check_and_runtime_checks_needed_select_of_ptr_add_recs(p
; CHECK-NEXT: Member: {%a,+,4}<nuw><%loop>
; CHECK-NEXT: Group GRP1:
; CHECK-NEXT: (Low: %b High: ((4 * %n) + %b))
-; CHECK-NEXT: Member: {%b,+,4}<%loop>
+; CHECK-NEXT: Member: {%b,+,4}<nw><%loop>
; CHECK-NEXT: Group GRP2:
; CHECK-NEXT: (Low: %c High: ((4 * %n) + %c))
-; CHECK-NEXT: Member: {%c,+,4}<%loop>
+; CHECK-NEXT: Member: {%c,+,4}<nw><%loop>
; CHECK-NEXT: Group GRP3:
; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a))
; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<nw><%loop>
diff --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
index 2b2a8e1e6f357..34eb0dad11584 100644
--- a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
+++ b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
@@ -102,7 +102,7 @@ define void @test-add-scope-invariant(ptr %input, i32 %needle) {
; CHECK-NEXT: %of_interest = add nuw nsw i32 %i.next, %offset
; CHECK-NEXT: --> {(1 + %offset)<nuw><nsw>,+,1}<nuw><%loop> U: [1,0) S: [1,0) Exits: %needle LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep2 = getelementptr i32, ptr %input, i32 %of_interest
-; CHECK-NEXT: --> ((4 * (sext i32 {(1 + %offset)<nuw><nsw>,+,1}<nuw><%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: ((4 * (sext i32 %needle to i64))<nsw> + %input) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ((4 * (sext i32 {(1 + %offset)<nuw><nsw>,+,1}<nuw><%loop> to i64))<nuw><nsw> + %input) U: full-set S: full-set Exits: ((4 * (sext i32 %needle to i64))<nuw><nsw> + %input) LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test-add-scope-invariant
; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (-1 * %offset) + %needle)
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1
@@ -133,7 +133,7 @@ define void @test-add-scope-bound(ptr %input, i32 %needle) {
; CHECK-NEXT: %i = phi i32 [ %i.next, %loop ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,1}<nuw><%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep = getelementptr i32, ptr %input, i32 %i
-; CHECK-NEXT: --> ((4 * (sext i32 {0,+,1}<nuw><%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ((4 * (sext i32 {0,+,1}<nuw><%loop> to i64))<nuw><nsw> + %input) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %offset = load i32, ptr %gep, align 4
; CHECK-NEXT: --> %offset U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Variant }
; CHECK-NEXT: %i.next = add nuw i32 %i, 1
@@ -174,7 +174,7 @@ define void @test-add-scope-bound-unkn-preheader(ptr %input, i32 %needle) {
; CHECK-NEXT: %i.next = add nuw i32 %i, %offset
; CHECK-NEXT: --> {%offset,+,%offset}<nuw><%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep2 = getelementptr i32, ptr %input, i32 %i.next
-; CHECK-NEXT: --> ((4 * (sext i32 {%offset,+,%offset}<nuw><%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ((4 * (sext i32 {%offset,+,%offset}<nuw><%loop> to i64))<nuw><nsw> + %input) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test-add-scope-bound-unkn-preheader
; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
@@ -205,7 +205,7 @@ define void @test-add-scope-bound-unkn-preheader-neg1(ptr %input, i32 %needle) {
; CHECK-NEXT: %i.next = add nuw i32 %i, %offset
; CHECK-NEXT: --> {%offset,+,%offset}<nuw><%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %gep2 = getelementptr i32, ptr %input, i32 %i.next
-; CHECK-NEXT: --> ((4 * (sext i32 {%offset,+,%offset}<nuw><%loop> to i64))<nsw> + %input) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ((4 * (sext i32 {%offset,+,%offset}<nuw><%loop> to i64))<nuw><nsw> + %input) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test-add-scope-bound-unkn-preheader-neg1
; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count.
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw.ll b/llvm/test/Analysis/ScalarEvolution/nsw.ll
index 4d668d1ffef11..1480ea223e34e 100644
--- a/llvm/test/Analysis/ScalarEvolution/nsw.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw.ll
@@ -13,19 +13,19 @@ define void @test1(ptr %p) nounwind {
; CHECK-NEXT: %tmp2 = sext i32 %i.01 to i64
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%bb> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
; CHECK-NEXT: %tmp3 = getelementptr double, ptr %p, i64 %tmp2
-; CHECK-NEXT: --> {%p,+,8}<%bb> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
+; CHECK-NEXT: --> {%p,+,8}<nw><%bb> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
; CHECK-NEXT: %tmp6 = sext i32 %i.01 to i64
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%bb> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
; CHECK-NEXT: %tmp7 = getelementptr double, ptr %p, i64 %tmp6
-; CHECK-NEXT: --> {%p,+,8}<%bb> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
+; CHECK-NEXT: --> {%p,+,8}<nw><%bb> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
; CHECK-NEXT: %tmp8 = add nsw i32 %i.01, 1
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%bb> U: [1,-2147483648) S: [1,-2147483648) Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
; CHECK-NEXT: %p.gep = getelementptr double, ptr %p, i32 %tmp8
-; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
+; CHECK-NEXT: --> {(8 + %p),+,8}<nw><%bb> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
; CHECK-NEXT: %phitmp = sext i32 %tmp8 to i64
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%bb> U: [1,-9223372036854775808) S: [1,-9223372036854775808) Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
; CHECK-NEXT: %tmp9 = getelementptr inbounds double, ptr %p, i64 %phitmp
-; CHECK-NEXT: --> {(8 + %p),+,8}<%bb> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
+; CHECK-NEXT: --> {(8 + %p),+,8}<nw><%bb> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb: Computable }
; CHECK-NEXT: Determining loop execution counts for: @test1
; CHECK-NEXT: Loop %bb: Unpredictable backedge-taken count.
; CHECK-NEXT: Loop %bb: Unpredictable constant max backedge-taken count.
diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
index e784d25385980..16a9ee3e3c47e 100644
--- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll
@@ -222,9 +222,9 @@ define void @ptrtoint_of_addrec(ptr %in, i32 %count) {
; X64-NEXT: %i6 = phi i64 [ 0, %entry ], [ %i9, %loop ]
; X64-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: (-1 + (zext i32 %count to i64))<nsw> LoopDispositions: { %loop: Computable }
; X64-NEXT: %i7 = getelementptr inbounds i32, ptr %in, i64 %i6
-; X64-NEXT: --> {%in,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * (zext i32 %count to i64))<nuw><nsw> + %in) LoopDispositions: { %loop: Computable }
+; X64-NEXT: --> {%in,+,4}<nw><%loop> U: full-set S: full-set Exits: (-4 + (4 * (zext i32 %count to i64))<nuw><nsw> + %in) LoopDispositions: { %loop: Computable }
; X64-NEXT: %i8 = ptrtoint ptr %i7 to i64
-; X64-NEXT: --> {(ptrtoint ptr %in to i64),+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * (zext i32 %count to i64))<nuw><nsw> + (ptrtoint ptr %in to i64)) LoopDispositions: { %loop: Computable }
+; X64-NEXT: --> {(ptrtoint ptr %in to i64),+,4}<nw><%loop> U: full-set S: full-set Exits: (-4 + (4 * (zext i32 %count to i64))<nuw><nsw> + (ptrtoint ptr %in to i64)) LoopDispositions: { %loop: Computable }
; X64-NEXT: %i9 = add nuw nsw i64 %i6, 1
; X64-NEXT: --> {1,+,1}<nuw><%loop> U: [1,0) S: [1,0) Exits: (zext i32 %count to i64) LoopDispositions: { %loop: Computable }
; X64-NEXT: Determining loop execution counts for: @ptrtoint_of_addrec
@@ -240,9 +240,9 @@ define void @ptrtoint_of_addrec(ptr %in, i32 %count) {
; X32-NEXT: %i6 = phi i64 [ 0, %entry ], [ %i9, %loop ]
; X32-NEXT: --> {0,+,1}<nuw><nsw><%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: (-1 + (zext i32 %count to i64))<nsw> LoopDispositions: { %loop: Computable }
; X32-NEXT: %i7 = getelementptr inbounds i32, ptr %in, i64 %i6
-; X32-NEXT: --> {%in,+,4}<%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %in) LoopDispositions: { %loop: Computable }
+; X32-NEXT: --> {%in,+,4}<nw><%loop> U: full-set S: full-set Exits: (-4 + (4 * %count) + %in) LoopDispositions: { %loop: Computable }
; X32-NEXT: %i8 = ptrtoint ptr %i7 to i64
-; X32-NEXT: --> (zext i32 {(ptrtoint ptr %in to i32),+,4}<%loop> to i64) U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-4 + (4 * %count) + (ptrtoint ptr %in to i32)) to i64) LoopDispositions: { %loop: Computable }
+; X32-NEXT: --> (zext i32 {(ptrtoint ptr %in to i32),+,4}<nw><%loop> to i64) U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-4 + (4 * %count) + (ptrtoint ptr %in to i32)) to i64) LoopDispositions: { %loop: Computable }
; X32-NEXT: %i9 = add nuw nsw i64 %i6, 1
; X32-NEXT: --> {1,+,1}<nuw><%loop> U: [1,0) S: [1,0) Exits: (zext i32 %count to i64) LoopDispositions: { %loop: Computable }
; X32-NEXT: Determining loop execution counts for: @ptrtoint_of_addrec
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll
index 9fe4b1ce5c0a9..29c9caeb445ec 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-scalable-stride.ll
@@ -237,7 +237,7 @@ define void @vscale_slt_with_vp_plain(ptr nocapture %A, i32 %n) mustprogress vsc
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
-; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %add = add nsw i32 %i.05, %VF
; CHECK-NEXT: --> {(4 * vscale)<nuw><nsw>,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [8,-2147483648) S: [8,2147483645) Exits: (vscale * (4 + (4 * ((-1 + %n) /u (4 * vscale)<nuw><nsw>))<nuw><nsw>)<nuw>) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: Determining loop execution counts for: @vscale_slt_with_vp_plain
@@ -278,7 +278,7 @@ define void @vscale_slt_with_vp_umin(ptr nocapture %A, i32 %n) mustprogress vsca
; CHECK-NEXT: %i.05 = phi i32 [ %add, %for.body ], [ 0, %entry ]
; CHECK-NEXT: --> {0,+,(4 * vscale)<nuw><nsw>}<nuw><nsw><%for.body> U: [0,-2147483648) S: [0,2147483645) Exits: (4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, ptr %A, i32 %i.05
-; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
+; CHECK-NEXT: --> {%A,+,(16 * vscale)<nuw><nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((16 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %A) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %left = sub i32 %n, %i.05
; CHECK-NEXT: --> {%n,+,(-4 * vscale)<nsw>}<nw><%for.body> U: full-set S: full-set Exits: ((-4 * vscale * ((-1 + %n) /u (4 * vscale)<nuw><nsw>)) + %n) LoopDispositions: { %for.body: Computable }
; CHECK-NEXT: %VF.capped = call i32 @llvm.umin.i32(i32 %VF, i32 %left)
diff --git a/llvm/test/Transforms/LoopIdiom/memset-runtime-debug.ll b/llvm/test/Transforms/LoopIdiom/memset-runtime-debug.ll
index 6b952efb1e6de..bf68b559299a6 100644
--- a/llvm/test/Transforms/LoopIdiom/memset-runtime-debug.ll
+++ b/llvm/test/Transforms/LoopIdiom/memset-runtime-debug.ll
@@ -13,10 +13,10 @@
; Check on debug outputs...
; CHECK: loop-idiom Scanning: F[MemsetSize_LoopVariant] Countable Loop %for.body
-; CHECK-NEXT: memset size is non-constant
+; CHECK: memset size is non-constant
; CHECK-NEXT: memset size is not a loop-invariant, abort
; CHECK: loop-idiom Scanning: F[MemsetSize_Stride_Mismatch] Countable Loop %for.body
-; CHECK-NEXT: memset size is non-constant
+; CHECK: memset size is non-constant
; CHECK-NEXT: MemsetSizeSCEV: (4 * (sext i32 %m to i64))<nsw>
; CHECK-NEXT: PositiveStrideSCEV: (4 + (4 * (sext i32 %m to i64))<nsw>)<nsw>
; CHECK-NEXT: Try to fold SCEV based on loop guard
@@ -24,10 +24,10 @@
; CHECK-NEXT: FoldedPositiveStride: (4 + (4 * (sext i32 %m to i64))<nsw>)<nsw>
; CHECK-NEXT: SCEV don't match, abort
; CHECK: loop-idiom Scanning: F[NonZeroAddressSpace] Countable Loop %for.cond1.preheader
-; CHECK-NEXT: memset size is non-constant
+; CHECK: memset size is non-constant
; CHECK-NEXT: pointer is not in address space zero, abort
; CHECK: loop-idiom Scanning: F[NonAffinePointer] Countable Loop %for.body
-; CHECK-NEXT: Pointer is not affine, abort
+; CHECK: Pointer is not affine, abort
define void @MemsetSize_LoopVariant(ptr %ar, i32 %n, i32 %m) {
; CHECK-LABEL: @MemsetSize_LoopVariant(
>From 03954edb6f311ce420626d69d44a85edec95e3d0 Mon Sep 17 00:00:00 2001
From: Sebastian Pop <spop at nvidia.com>
Date: Thu, 21 Aug 2025 16:58:56 -0500
Subject: [PATCH 7/7] [DependenceAnalysis] Fix incorrect analysis of wrapping
AddRec expressions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Fixes GitHub issue #148435 where {false,+,true} patterns reported
"da analyze - none!" instead of correct "da analyze - output [*]!".
The issue occurs when AddRec expressions in narrow types create cyclic
patterns (e.g., {false,+,true} in i1 arithmetic: 0,1,0,1,0,1...) that
violate SIV analysis assumptions of linear, non-wrapping recurrences.
The fix detects potential wrapping by checking if step × iteration_count
exceeds the type's representable range, then classifies such expressions
as NonLinear for conservative analysis.
---
llvm/include/llvm/Analysis/ScalarEvolution.h | 6 +++
llvm/lib/Analysis/DependenceAnalysis.cpp | 28 ++++++++++++++
llvm/lib/Analysis/ScalarEvolution.cpp | 33 +++++++++++++++-
llvm/test/Analysis/DDG/basic-a.ll | 4 +-
llvm/test/Analysis/DDG/basic-b.ll | 8 ++--
.../Analysis/DependenceAnalysis/DADelin.ll | 34 ++++++++++++++---
.../DependenceAnalysis/MIVCheckConst.ll | 10 +++++
.../Analysis/DependenceAnalysis/PR148435.ll | 6 +--
.../SimpleSIVNoValidityCheck.ll | 12 ++++++
.../DependenceAnalysis/wrapping-addrec-1.ll | 38 +++++++++++++++++++
.../DependenceAnalysis/wrapping-addrec.ll | 33 ++++++++++++++++
.../DependenceAnalysis/wrapping-maxbtc.ll | 34 +++++++++++++++++
12 files changed, 228 insertions(+), 18 deletions(-)
create mode 100644 llvm/test/Analysis/DependenceAnalysis/wrapping-addrec-1.ll
create mode 100644 llvm/test/Analysis/DependenceAnalysis/wrapping-addrec.ll
create mode 100644 llvm/test/Analysis/DependenceAnalysis/wrapping-maxbtc.ll
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 167845ce646b9..9098186922de3 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1337,6 +1337,12 @@ class ScalarEvolution {
/// sharpen it.
LLVM_ABI void setNoWrapFlags(SCEVAddRecExpr *AddRec, SCEV::NoWrapFlags Flags);
+ /// Check if this AddRec expression may wrap.
+ /// Wrapping AddRecs create cyclic patterns that violate linearity
+ /// assumptions. Returns true if definitely wraps, false if definitely safe,
+ /// nullopt if unknown.
+ LLVM_ABI std::optional<bool> mayAddRecWrap(const SCEVAddRecExpr *AddRec);
+
class LoopGuards {
DenseMap<const SCEV *, const SCEV *> RewriteMap;
bool PreserveNUW = false;
diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp
index 8bc6ec699549d..8b84ebda0e3b7 100644
--- a/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -2256,6 +2256,34 @@ bool DependenceInfo::testSIV(const SCEV *Src, const SCEV *Dst, unsigned &Level,
assert(CurLoop == DstAddRec->getLoop() &&
"both loops in SIV should be same");
Level = mapSrcLoop(CurLoop);
+
+ // Check if either AddRec may wrap, which would invalidate SIV analysis
+ auto addWrapPredicate = [&](const SCEVAddRecExpr *AR, const char *Name) {
+ std::optional<bool> MayWrap = SE->mayAddRecWrap(AR);
+ if (MayWrap == true) {
+ LLVM_DEBUG(dbgs() << "\tSIV test skipped due to wrapping " << Name
+ << " AddRec\n");
+ return false;
+ }
+ if (!MayWrap.has_value()) {
+ // Unknown wrapping - add runtime predicate.
+ auto WrapFlags = static_cast<SCEVWrapPredicate::IncrementWrapFlags>(
+ SCEVWrapPredicate::IncrementNUSW |
+ SCEVWrapPredicate::IncrementNSSW);
+ const SCEVPredicate *WrapPred = SE->getWrapPredicate(AR, WrapFlags);
+ const_cast<DependenceInfo *>(this)->Assumptions.push_back(WrapPred);
+ LLVM_DEBUG(dbgs() << "\t Added runtime wrap assumption for " << Name
+ << ": " << *AR << "\n");
+ }
+ return true;
+ };
+
+ if (!addWrapPredicate(SrcAddRec, "Src") ||
+ !addWrapPredicate(DstAddRec, "Dst")) {
+ // Conservative - cannot prove independence.
+ return false;
+ }
+
bool disproven;
if (SrcCoeff == DstCoeff)
disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop, Level,
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 5763ef0d1fefa..bf8876a98066f 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6502,8 +6502,37 @@ void ScalarEvolution::setNoWrapFlags(SCEVAddRecExpr *AddRec,
}
}
-ConstantRange ScalarEvolution::
-getRangeForUnknownRecurrence(const SCEVUnknown *U) {
+std::optional<bool>
+ScalarEvolution::mayAddRecWrap(const SCEVAddRecExpr *AddRec) {
+ if (AddRec->hasNoSelfWrap())
+ return false;
+
+ // For simple constant step cases, do explicit wrapping check.
+ const SCEV *Step = AddRec->getStepRecurrence(*this);
+ const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
+ if (ConstStep) {
+ const Loop *Loop = AddRec->getLoop();
+ if (hasLoopInvariantBackedgeTakenCount(Loop)) {
+ const SCEV *BTC = getBackedgeTakenCount(Loop);
+ const SCEVConstant *ConstBTC = dyn_cast<SCEVConstant>(BTC);
+ if (ConstBTC) {
+ // Check if step * iterations would exceed type range.
+ APInt StepVal = ConstStep->getAPInt();
+ APInt BTCVal = ConstBTC->getAPInt();
+ unsigned BitWidth = AddRec->getType()->getScalarSizeInBits();
+
+ bool Overflow = false;
+ APInt Product = StepVal.zext(64).umul_ov(BTCVal.zext(64), Overflow);
+
+ return Overflow || Product.getZExtValue() >= (1ULL << BitWidth);
+ }
+ }
+ }
+ return std::nullopt;
+}
+
+ConstantRange
+ScalarEvolution::getRangeForUnknownRecurrence(const SCEVUnknown *U) {
const DataLayout &DL = getDataLayout();
unsigned BitWidth = getTypeSizeInBits(U->getType());
diff --git a/llvm/test/Analysis/DDG/basic-a.ll b/llvm/test/Analysis/DDG/basic-a.ll
index 6e7e4034d2be2..ebc83841f07e7 100644
--- a/llvm/test/Analysis/DDG/basic-a.ll
+++ b/llvm/test/Analysis/DDG/basic-a.ll
@@ -98,7 +98,7 @@ for.end: ; preds = %test1.for.body, %en
; CHECK: Node Address:[[N2]]:single-instruction
; CHECK-NEXT: Instructions:
-; CHECK-NEXT: %inc = add i64 %i.02, 1
+; CHECK-NEXT: %inc = add nsw i64 %i.02, 1
; CHECK-NEXT: Edges:
; CHECK-NEXT: [def-use] to [[N1]]
; CHECK-NEXT: --- end of nodes in pi-block ---
@@ -168,7 +168,7 @@ test2.for.body: ; preds = %entry, %test2
%add = fadd float %0, %1
%arrayidx2 = getelementptr inbounds float, ptr %a, i64 %i.02
store float %add, ptr %arrayidx2, align 4
- %inc = add i64 %i.02, 1
+ %inc = add nsw i64 %i.02, 1
%exitcond = icmp ne i64 %inc, %n
br i1 %exitcond, label %test2.for.body, label %for.end
diff --git a/llvm/test/Analysis/DDG/basic-b.ll b/llvm/test/Analysis/DDG/basic-b.ll
index 66a6a83972406..6b8ecc5a4a8a3 100644
--- a/llvm/test/Analysis/DDG/basic-b.ll
+++ b/llvm/test/Analysis/DDG/basic-b.ll
@@ -12,7 +12,7 @@
; CHECK: Node Address:[[N3]]:single-instruction
; CHECK-NEXT: Instructions:
-; CHECK-NEXT: %inc = add i64 %i.02, 1
+; CHECK-NEXT: %inc = add nsw i64 %i.02, 1
; CHECK-NEXT: Edges:
; CHECK-NEXT: [def-use] to [[N2]]
; CHECK-NEXT:--- end of nodes in pi-block ---
@@ -95,7 +95,7 @@ test1.for.body: ; preds = %entry, %test1
%add = fadd float %0, %1
%arrayidx3 = getelementptr inbounds float, ptr %a, i64 %i.02
store float %add, ptr %arrayidx3, align 4
- %inc = add i64 %i.02, 1
+ %inc = add nsw i64 %i.02, 1
%cmp = icmp ult i64 %inc, %sub
br i1 %cmp, label %test1.for.body, label %for.end
@@ -116,7 +116,7 @@ for.end: ; preds = %test1.for.body, %en
; CHECK: Node Address:[[N3]]:single-instruction
; CHECK-NEXT: Instructions:
-; CHECK-NEXT: %inc = add i64 %i.02, 1
+; CHECK-NEXT: %inc = add nsw i64 %i.02, 1
; CHECK-NEXT: Edges:
; CHECK-NEXT: [def-use] to [[N2]]
; CHECK-NEXT:--- end of nodes in pi-block ---
@@ -188,7 +188,7 @@ test2.for.body: ; preds = %entry, %test2
%add = fadd float %0, %1
%arrayidx3 = getelementptr inbounds float, ptr %a, i64 %i.02
store float %add, ptr %arrayidx3, align 4
- %inc = add i64 %i.02, 1
+ %inc = add nsw i64 %i.02, 1
%cmp = icmp ult i64 %inc, %sub
br i1 %cmp, label %test2.for.body, label %for.end
diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
index af62e369b71a1..200dbe374b1d4 100644
--- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
@@ -417,8 +417,12 @@ define void @t7(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - consistent anti [1 0 0]!
+; CHECK-NEXT: Runtime Assumptions:
+; CHECK-NEXT: {-1,+,1}<%for.cond1.preheader> Added Flags: <nusw><nssw>
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Runtime Assumptions:
+; CHECK-NEXT: {-1,+,1}<%for.cond1.preheader> Added Flags: <nusw><nssw>
;
entry:
%cmp49 = icmp sgt i32 %n, 0
@@ -442,17 +446,18 @@ for.cond5.preheader: ; preds = %for.cond.cleanup7,
br i1 %cmp645, label %for.body8.lr.ph, label %for.cond.cleanup7
for.body8.lr.ph: ; preds = %for.cond5.preheader
- %mul944 = add i32 %j.048, %mul
- %add = mul i32 %mul944, %o
+ %mul944 = add nsw i32 %j.048, %mul
+ %add = mul nsw i32 %mul944, %o
br label %for.body8
for.body8: ; preds = %for.body8, %for.body8.lr.ph
%k.046 = phi i32 [ 0, %for.body8.lr.ph ], [ %inc, %for.body8 ]
- %add11 = add nsw i32 %k.046, %add
+
+%add11 = add nsw i32 %k.046, %add
%arrayidx = getelementptr inbounds i32, ptr %A, i32 %add11
%0 = load i32, ptr %arrayidx, align 4
%add12 = add nsw i32 %0, 1
- %mo = mul i32 %m, %o
+ %mo = mul nsw i32 %m, %o
%add111 = sub nsw i32 %add11, %mo
%arrayidx2 = getelementptr inbounds i32, ptr %A, i32 %add111
store i32 %add12, ptr %arrayidx2, align 4
@@ -649,8 +654,14 @@ define void @coeff_may_negative(ptr %a, i32 %k) {
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
; CHECK-NEXT: da analyze - output [*|<]!
+; CHECK-NEXT: Runtime Assumptions:
+; CHECK-NEXT: {%a,+,%k}<%loop> Added Flags: <nusw><nssw>
+; CHECK-NEXT: {(%k + %a),+,%k}<%loop> Added Flags: <nusw><nssw>
; CHECK-NEXT: Src: store i8 42, ptr %idx.1, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Runtime Assumptions:
+; CHECK-NEXT: {%a,+,%k}<%loop> Added Flags: <nusw><nssw>
+; CHECK-NEXT: {(%k + %a),+,%k}<%loop> Added Flags: <nusw><nssw>
;
entry:
br label %loop
@@ -688,11 +699,24 @@ define void @coeff_positive(ptr %a, i32 %k) {
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: store i8 42, ptr %idx.0, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
; CHECK-NEXT: da analyze - output [*|<]!
+; CHECK-NEXT: Runtime Assumptions:
+; CHECK-NEXT: {%a,+,%k}<%loop> Added Flags: <nusw><nssw>
+; CHECK-NEXT: {(%k + %a),+,%k}<%loop> Added Flags: <nusw><nssw>
; CHECK-NEXT: Src: store i8 42, ptr %idx.1, align 1 --> Dst: store i8 42, ptr %idx.1, align 1
; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Runtime Assumptions:
+; CHECK-NEXT: {%a,+,%k}<%loop> Added Flags: <nusw><nssw>
+; CHECK-NEXT: {(%k + %a),+,%k}<%loop> Added Flags: <nusw><nssw>
;
+
entry:
- br label %loop
+
+
+
+
+
+
+br label %loop
loop:
%i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
diff --git a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll
index b498d70648bad..6dd1b16fc16c6 100644
--- a/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/MIVCheckConst.ll
@@ -43,16 +43,26 @@ define void @test(ptr %A, ptr %B, i1 %arg, i32 %n, i32 %m) #0 align 2 {
; CHECK-NEXT: Runtime Assumptions:
; CHECK-NEXT: Equal predicate: (zext i7 (4 * (trunc i32 %v1 to i7) * (1 + (trunc i32 %n to i7))) to i32) == 0
; CHECK-NEXT: Equal predicate: (8 * (zext i4 (trunc i32 %v1 to i4) to i32))<nuw><nsw> == 0
+; CHECK-NEXT: {(80640 + (4 * (1 + %n) * %v1) + %A),+,(8 * %v1)}<%bb13> Added Flags: <nusw><nssw>
; CHECK-NEXT: Src: %v27 = load <32 x i32>, ptr %v25, align 256 --> Dst: %v32 = load <32 x i32>, ptr %v30, align 128
; CHECK-NEXT: da analyze - input [* S S|<]!
; CHECK-NEXT: Runtime Assumptions:
; CHECK-NEXT: Equal predicate: (zext i7 (4 * (trunc i32 %v1 to i7) * (1 + (trunc i32 %n to i7))) to i32) == 0
; CHECK-NEXT: Equal predicate: (8 * (zext i4 (trunc i32 %v1 to i4) to i32))<nuw><nsw> == 0
+; CHECK-NEXT: {(80640 + (4 * (1 + %n) * %v1) + %A),+,(8 * %v1)}<%bb13> Added Flags: <nusw><nssw>
+; CHECK-NEXT: {(126720 + (128 * %m) + %A),+,256}<%bb13> Added Flags: <nusw><nssw>
; CHECK-NEXT: Src: %v32 = load <32 x i32>, ptr %v30, align 128 --> Dst: %v32 = load <32 x i32>, ptr %v30, align 128
; CHECK-NEXT: da analyze - consistent input [0 S S]!
+; CHECK-NEXT: Runtime Assumptions:
+; CHECK-NEXT: Equal predicate: (zext i7 (4 * (trunc i32 %v1 to i7) * (1 + (trunc i32 %n to i7))) to i32) == 0
+; CHECK-NEXT: Equal predicate: (8 * (zext i4 (trunc i32 %v1 to i4) to i32))<nuw><nsw> == 0
+; CHECK-NEXT: {(80640 + (4 * (1 + %n) * %v1) + %A),+,(8 * %v1)}<%bb13> Added Flags: <nusw><nssw>
+; CHECK-NEXT: {(126720 + (128 * %m) + %A),+,256}<%bb13> Added Flags: <nusw><nssw>
; CHECK-NEXT: Runtime Assumptions:
; CHECK-NEXT: Equal predicate: (zext i7 (4 * (trunc i32 %v1 to i7) * (1 + (trunc i32 %n to i7))) to i32) == 0
; CHECK-NEXT: Equal predicate: (8 * (zext i4 (trunc i32 %v1 to i4) to i32))<nuw><nsw> == 0
+; CHECK-NEXT: {(80640 + (4 * (1 + %n) * %v1) + %A),+,(8 * %v1)}<%bb13> Added Flags: <nusw><nssw>
+; CHECK-NEXT: {(126720 + (128 * %m) + %A),+,256}<%bb13> Added Flags: <nusw><nssw>
;
entry:
%v1 = load i32, ptr %B, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/PR148435.ll b/llvm/test/Analysis/DependenceAnalysis/PR148435.ll
index 1633a91336f68..762c997f3468c 100644
--- a/llvm/test/Analysis/DependenceAnalysis/PR148435.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/PR148435.ll
@@ -6,9 +6,6 @@
; expressions contain AddRec after propagation, which can happen when
; constraints simplify the expressions to non-AddRec forms.
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
-target triple = "aarch64-unknown-linux-gnu"
-
define void @_Z1cb(ptr %a) {
; CHECK-LABEL: '_Z1cb'
; CHECK-NEXT: Src: store i8 0, ptr %arrayidx9, align 1 --> Dst: store i8 0, ptr %arrayidx9, align 1
@@ -75,8 +72,7 @@ for.end10: ; preds = %for.cond
define void @f1(ptr %a) {
; CHECK-LABEL: 'f1'
; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
-; CHECK-NEXT: da analyze - none!
-; Note: the second patch for PR148435 modifies the above CHECK to correct "output [*]".
+; CHECK-NEXT: da analyze - consistent output [*]!
;
entry:
br label %loop
diff --git a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll
index 4346507ba8f90..47d99b3b7183c 100644
--- a/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/SimpleSIVNoValidityCheck.ll
@@ -81,8 +81,14 @@ define void @t2(i32 signext %n, i32 signext %m, ptr %a) {
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %21 = load i32, ptr %arrayidx28, align 4 --> Dst: store i32 %21, ptr %arrayidx38, align 4
; CHECK-NEXT: da analyze - consistent anti [1 -2 0 -3 2]!
+; CHECK-NEXT: Runtime Assumptions:
+; CHECK-NEXT: {0,+,1}<%for.body12> Added Flags: <nusw><nssw>
+; CHECK-NEXT: {3,+,1}<%for.body12> Added Flags: <nusw><nssw>
; CHECK-NEXT: Src: store i32 %21, ptr %arrayidx38, align 4 --> Dst: store i32 %21, ptr %arrayidx38, align 4
; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Runtime Assumptions:
+; CHECK-NEXT: {0,+,1}<%for.body12> Added Flags: <nusw><nssw>
+; CHECK-NEXT: {3,+,1}<%for.body12> Added Flags: <nusw><nssw>
;
; LIN-LABEL: 't2'
; LIN-NEXT: Src: %21 = load i32, ptr %arrayidx28, align 4 --> Dst: %21 = load i32, ptr %arrayidx28, align 4
@@ -211,8 +217,14 @@ define void @t3(i64 %n, i64 %m, i64 %lb, ptr %a) {
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx6, align 4 --> Dst: store i32 %2, ptr %arrayidx8, align 4
; CHECK-NEXT: da analyze - consistent anti [1 -2]!
+; CHECK-NEXT: Runtime Assumptions:
+; CHECK-NEXT: {(-2 + %lb),+,1}<%for.body4> Added Flags: <nusw><nssw>
+; CHECK-NEXT: {%lb,+,1}<%for.body4> Added Flags: <nusw><nssw>
; CHECK-NEXT: Src: store i32 %2, ptr %arrayidx8, align 4 --> Dst: store i32 %2, ptr %arrayidx8, align 4
; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: Runtime Assumptions:
+; CHECK-NEXT: {(-2 + %lb),+,1}<%for.body4> Added Flags: <nusw><nssw>
+; CHECK-NEXT: {%lb,+,1}<%for.body4> Added Flags: <nusw><nssw>
;
; LIN-LABEL: 't3'
; LIN-NEXT: Src: %2 = load i32, ptr %arrayidx6, align 4 --> Dst: %2 = load i32, ptr %arrayidx6, align 4
diff --git a/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec-1.ll b/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec-1.ll
new file mode 100644
index 0000000000000..f932425332ce4
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec-1.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for bug #148435 - SIV test assertion failure
+; This test ensures that testSIV handles the case where neither Src nor Dst
+; expressions contain AddRec after propagation, which can happen when
+; constraints simplify the expressions to non-AddRec forms.
+
+define void @f(ptr %a) {
+; CHECK-LABEL: 'f'
+; CHECK-NEXT: Src: store i8 42, ptr %idx, align 1 --> Dst: store i8 42, ptr %idx, align 1
+; CHECK-NEXT: da analyze - consistent output [* *]!
+;
+entry:
+ br label %loop.i.header
+
+loop.i.header:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop.i.latch ]
+ %and.i = and i64 %i, 1
+ br label %loop.j
+
+loop.j:
+ %j = phi i64 [ 0, %loop.i.header ], [ %j.next, %loop.j ]
+ %and.j = and i64 %j, 1
+ %idx = getelementptr [2 x [2 x i8]], ptr %a, i64 0, i64 %and.i, i64 %and.j
+ store i8 42, ptr %idx
+ %j.next = add i64 %j, 1
+ %exitcond.j = icmp eq i64 %j.next, 100
+ br i1 %exitcond.j, label %loop.i.latch, label %loop.j
+
+loop.i.latch:
+ %i.next = add i64 %i, 1
+ %exitcond.i = icmp eq i64 %i.next, 100
+ br i1 %exitcond.i, label %exit, label %loop.i.header
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec.ll b/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec.ll
new file mode 100644
index 0000000000000..4db71184ae168
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/wrapping-addrec.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for wrapping AddRec detection in DependenceAnalysis.
+; This ensures that AddRec expressions that wrap (creating cyclic rather than
+; linear patterns) are rejected from SIV analysis and treated conservatively.
+
+; This test case has a clear dependence pattern that was incorrectly reported as "none!"
+; The issue: {false,+,true} in i1 arithmetic creates pattern (0,1,0,1,0,1,...).
+; - i=0: a[0][0][0], i=1: a[0][1][1], i=2: a[0][0][0], i=3: a[0][1][1], ...
+; - Clear dependencies at distances 2, 4, 6 between iterations accessing same locations.
+; - Strong SIV test was missing these due to treating wrapping pattern as linear.
+
+define void @test_wrapping_i1_addrec(ptr %a) {
+; CHECK-LABEL: 'test_wrapping_i1_addrec'
+; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
+; CHECK-NEXT: da analyze - consistent output [*]!
+;
+entry:
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+ %and = and i64 %i, 1
+ %idx = getelementptr inbounds [4 x [4 x i8]], ptr %a, i64 0, i64 %and, i64 %and
+ store i8 0, ptr %idx
+ %i.next = add i64 %i, 1
+ %exitcond.not = icmp slt i64 %i.next, 8
+ br i1 %exitcond.not, label %loop, label %exit
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/wrapping-maxbtc.ll b/llvm/test/Analysis/DependenceAnalysis/wrapping-maxbtc.ll
new file mode 100644
index 0000000000000..4033fb4672603
--- /dev/null
+++ b/llvm/test/Analysis/DependenceAnalysis/wrapping-maxbtc.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -disable-output "-passes=print<da>" 2>&1 | FileCheck %s
+
+; Test case for wrapping AddRec detection using constant max backedge taken count.
+; This ensures that wrapping detection works even when exact BTC is not available
+; but we can get a conservative upper bound.
+
+; Test case where loop has variable bound but SCEV can provide max BTC estimate.
+; The i2 type can only represent 0,1,2,3, so if we iterate more than 4 times
+; with step=1, we'll get wrapping: 0,1,2,3,0,1,2,3...
+
+define void @test_wrapping_with_maxbtc(ptr %a, i32 %n) {
+; CHECK-LABEL: 'test_wrapping_with_maxbtc'
+; CHECK-NEXT: Src: store i8 0, ptr %idx, align 1 --> Dst: store i8 0, ptr %idx, align 1
+; CHECK-NEXT: da analyze - output [*]!
+;
+entry:
+ %bound = and i32 %n, 1023 ; Limit n to at most 1024
+ %cmp = icmp sgt i32 %bound, 0
+ br i1 %cmp, label %loop, label %exit
+
+loop:
+ %i = phi i32 [ 0, %entry ], [ %i.next, %loop ]
+ %i.narrow = trunc i32 %i to i2 ; Only 2 bits: wraps after 4 iterations
+ %zext = zext i2 %i.narrow to i64
+ %idx = getelementptr inbounds [8 x i8], ptr %a, i64 0, i64 %zext
+ store i8 0, ptr %idx
+ %i.next = add i32 %i, 1
+ %exitcond = icmp slt i32 %i.next, %bound ; Variable upper bound
+ br i1 %exitcond, label %loop, label %exit
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list