[llvm] [SCEVDivision] Prevent propagation of incorrect no-wrap flags (PR #154745)

Thu Aug 21 05:19:44 PDT 2025

https://github.com/kasuga-fj created https://github.com/llvm/llvm-project/pull/154745

In `SCEVDivision`, when the numerator is `SCEVAddRecExpr`, its no-wrap flags were propagated to the quotient and remainder. In general, it is incorrect. For example, consider dividing `{0,+,(%m + %n)}<nuw><nsw><%loop>` by `%m`. The quotient would be `{0,+,1}<%loop>` and the remainder would be `{0,+,%n}<%loop>`. If `%m` and `%n` have opposite signs, propagating the no-wrap flags from the numerator may be invalid. 
This patch prevents the incorrect propagation of no-wrap flags in such cases and introduces a small inference for the `<NW>` flags in the remainder, primarily to avoid changing existing test results.

Fix #152566

>From 1e2b6d4c5631b1d228fbe3fecc5134de76c7be7a Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 20 Aug 2025 10:11:43 +0000
Subject: [PATCH] [SCEVDivision] Prevent propagation of incorrect no-wrap flags

---
 llvm/lib/Analysis/ScalarEvolutionDivision.cpp |  22 ++-
 .../Delinearization/fixed_size_array.ll       |   2 +-
 llvm/test/Analysis/Delinearization/wraps.ll   | 130 ++++++++++++++++++
 .../Analysis/DependenceAnalysis/DADelin.ll    |   6 +-
 4 files changed, 154 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/Analysis/Delinearization/wraps.ll

diff --git a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
index d03930d9e2d99..c374d328c984a 100644
--- a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
@@ -141,10 +141,26 @@ void SCEVDivision::visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
   if (Ty != StartQ->getType() || Ty != StartR->getType() ||
       Ty != StepQ->getType() || Ty != StepR->getType())
     return cannotDivide(Numerator);
+
+  // Infer no-wrap flags for Remainder.
+  // TODO: Catch more cases.
+  SCEV::NoWrapFlags NumFlags = Numerator->getNoWrapFlags();
+  SCEV::NoWrapFlags RemFlags = SCEV::NoWrapFlags::FlagAnyWrap;
+  const SCEV *StepNumAbs =
+      SE.getAbsExpr(Numerator->getStepRecurrence(SE), /*IsNSW=*/false);
+  const SCEV *StepRAbs = SE.getAbsExpr(StepR, /*IsNSW=*/false);
+  const Loop *L = Numerator->getLoop();
+
+  // If abs(StepR) <=u abs(StepNumAbs) and both are loop invariant, propagate
+  // the <NW> from Numerator to Remainder.
+  if (ScalarEvolution::hasFlags(NumFlags, SCEV::NoWrapFlags::FlagNW) &&
+      SE.isLoopInvariant(StepNumAbs, L) && SE.isLoopInvariant(StepRAbs, L) &&
+      SE.isKnownPredicate(ICmpInst::ICMP_ULE, StepRAbs, StepNumAbs))
+    RemFlags = ScalarEvolution::setFlags(RemFlags, SCEV::NoWrapFlags::FlagNW);
+
   Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
-                              Numerator->getNoWrapFlags());
-  Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
-                               Numerator->getNoWrapFlags());
+                              SCEV::NoWrapFlags::FlagAnyWrap);
+  Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), RemFlags);
 }
 
 void SCEVDivision::visitAddExpr(const SCEVAddExpr *Numerator) {
diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
index 0512044990163..e77fd79a49b35 100644
--- a/llvm/test/Analysis/Delinearization/fixed_size_array.ll
+++ b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
@@ -163,7 +163,7 @@ exit:
 ; CHECK:      Delinearization on function a_i_2j1_k:
 ; CHECK:      Base offset: %a
 ; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
-; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><%for.j.header>][{32,+,1}<nw><%for.k>]
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<%for.j.header>][{32,+,1}<%for.k>]
 define void @a_i_2j1_k(ptr %a) {
 entry:
   br label %for.i.header
diff --git a/llvm/test/Analysis/Delinearization/wraps.ll b/llvm/test/Analysis/Delinearization/wraps.ll
new file mode 100644
index 0000000000000..fc4935bad9939
--- /dev/null
+++ b/llvm/test/Analysis/Delinearization/wraps.ll
@@ -0,0 +1,130 @@
+; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
+
+; In the following case, we don't know the concret value of `m`, so we cannot
+; deduce no-wrap behavior for the quotient/remainder divided by `m`. However,
+; we can infer `{0,+,1}<%loop>` is nuw and nsw from the induction variable.
+;
+; for (int i = 0; i < btc; i++)
+;   a[i * (m + 42)] = 0;
+
+; CHECK:      AccessFunction: {0,+,(42 + %m)}<nuw><nsw><%loop>
+; CHECK-NEXT: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 1 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%loop>][{0,+,42}<%loop>]
+define void @divide_by_m0(ptr %a, i64 %m, i64 %btc) {
+entry:
+  %stride = add nsw nuw i64 %m, 42
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %offset = phi i64 [ 0, %entry ], [ %offset.next, %loop ]
+  %idx = getelementptr inbounds i8, ptr %a, i64 %offset
+  store i8 0, ptr %idx
+  %i.next = add nsw nuw i64 %i, 1
+  %offset.next = add nsw nuw i64 %offset, %stride
+  %cond = icmp eq i64 %i.next, %btc
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; In the following case, we don't know the concret value of `m`, so we cannot
+; deduce no-wrap behavior for the quotient/remainder divided by `m`. Also, we
+; don't infer nsw/nuw from the induction variable in this case.
+;
+; for (int i = 0; i < btc; i++)
+;   a[i * (2 * m + 42)] = 0;
+
+; CHECK:      AccessFunction: {0,+,(42 + (2 * %m))}<nuw><nsw><%loop>
+; CHECK-NEXT: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 1 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,2}<%loop>][{0,+,42}<%loop>]
+define void @divide_by_m2(ptr %a, i64 %m, i64 %btc) {
+entry:
+  %m2 = add nsw nuw i64 %m, %m
+  %stride = add nsw nuw i64 %m2, 42
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  %offset = phi i64 [ 0, %entry ], [ %offset.next, %loop ]
+  %idx = getelementptr inbounds i8, ptr %a, i64 %offset
+  store i8 0, ptr %idx
+  %i.next = add nsw nuw i64 %i, 1
+  %offset.next = add nsw nuw i64 %offset, %stride
+  %cond = icmp eq i64 %i.next, %btc
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; In the following case, the `i * 2 * d` is always zero, so it's nsw and nuw.
+; However, the quotient divided by `d` is neither nsw nor nuw.
+;
+; if (d == 0)
+;   for (unsigned long long i = 0; i != UINT64_MAX; i++)
+;     a[i * 2 * d] = 42;
+
+; CHECK:      AccessFunction: {0,+,(2 * %d)}<nuw><nsw><%loop>
+; CHECK-NEXT: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][%d] with elements of 1 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,2}<%loop>][0]
+define void @divide_by_zero(ptr %a, i64 %d) {
+entry:
+  %guard = icmp eq i64 %d, 0
+  br i1 %guard, label %loop.preheader, label %exit
+
+loop.preheader:
+  %stride = mul nsw nuw i64 %d, 2  ; since %d is 0, %stride is also 0
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %loop.preheader ], [ %i.next, %loop ]
+  %offset = phi i64 [ 0, %loop.preheader ], [ %offset.next, %loop ]
+  %idx = getelementptr inbounds i8, ptr %a, i64 %offset
+  store i8 42, ptr %idx
+  %i.next = add nuw i64 %i, 1
+  %offset.next = add nsw nuw i64 %offset, %stride
+  %cond = icmp eq i64 %i.next, -1
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
+
+; In the following case, the `i * (d + 1)` is always zero, so it's nsw and nuw.
+; However, the quotient/remainder divided by `d` is not nsw.
+;
+; if (d == UINT64_MAX)
+;   for (unsigned long long i = 0; i != d; i++)
+;     a[i * (d + 1)] = 42;
+
+; CHECK:      AccessFunction: {0,+,(1 + %d)}<nuw><nsw><%loop>
+; CHECK-NEXT: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][%d] with elements of 1 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><%loop>][{0,+,1}<nuw><%loop>]
+define void @divide_by_minus_one(ptr %a, i64 %d) {
+entry:
+  %guard = icmp eq i64 %d, -1
+  br i1 %guard, label %loop.preheader, label %exit
+
+loop.preheader:
+  %stride = add nsw i64 %d, 1  ; since %d is -1, %stride is 0
+  br label %loop
+
+loop:
+  %i = phi i64 [ 0, %loop.preheader ], [ %i.next, %loop ]
+  %offset = phi i64 [ 0, %loop.preheader ], [ %offset.next, %loop ]
+  %idx = getelementptr inbounds i8, ptr %a, i64 %offset
+  store i8 42, ptr %idx
+  %i.next = add nuw i64 %i, 1
+  %offset.next = add nsw nuw i64 %offset, %stride
+  %cond = icmp eq i64 %i.next, %d
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
index 8f94a455d3724..f670136aed750 100644
--- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
@@ -479,14 +479,16 @@ for.cond.cleanup:                                 ; preds = %for.cond.cleanup3,
 ;;    for (int k = 1; k < o; k++)
 ;;      = A[i*m*o + j*o + k]
 ;;     A[i*m*o + j*o + k - 1] =
+;;
+;; FIXME: Currently fails to infer nsw for the SCEV `{0,+,1}<for.body8>`
 define void @t8(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
 ; CHECK-LABEL: 't8'
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
 ; CHECK-NEXT:    da analyze - none!
 ; CHECK-NEXT:  Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
-; CHECK-NEXT:    da analyze - consistent anti [0 0 1]!
+; CHECK-NEXT:    da analyze - anti [* * *|<]!
 ; CHECK-NEXT:  Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
-; CHECK-NEXT:    da analyze - none!
+; CHECK-NEXT:    da analyze - output [* * *]!
 ;
 entry:
   %cmp49 = icmp sgt i32 %n, 0