[llvm] [SCEVDivision] Prevent propagation of incorrect no-wrap flags (PR #154745)
Ryotaro Kasuga via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 21 05:19:44 PDT 2025
https://github.com/kasuga-fj created https://github.com/llvm/llvm-project/pull/154745
In `SCEVDivision`, when the numerator is `SCEVAddRecExpr`, its no-wrap flags were propagated to the quotient and remainder. In general, it is incorrect. For example, consider dividing `{0,+,(%m + %n)}<nuw><nsw><%loop>` by `%m`. The quotient would be `{0,+,1}<%loop>` and the remainder would be `{0,+,%n}<%loop>`. If `%m` and `%n` have opposite signs, propagating the no-wrap flags from the numerator may be invalid.
This patch prevents the incorrect propagation of no-wrap flags in such cases and introduces a small inference for the `<NW>` flags in the remainder, primarily to avoid changing existing test results.
Fix #152566
>From 1e2b6d4c5631b1d228fbe3fecc5134de76c7be7a Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 20 Aug 2025 10:11:43 +0000
Subject: [PATCH] [SCEVDivision] Prevent propagation of incorrect no-wrap flags
---
llvm/lib/Analysis/ScalarEvolutionDivision.cpp | 22 ++-
.../Delinearization/fixed_size_array.ll | 2 +-
llvm/test/Analysis/Delinearization/wraps.ll | 130 ++++++++++++++++++
.../Analysis/DependenceAnalysis/DADelin.ll | 6 +-
4 files changed, 154 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/Analysis/Delinearization/wraps.ll
diff --git a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
index d03930d9e2d99..c374d328c984a 100644
--- a/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionDivision.cpp
@@ -141,10 +141,26 @@ void SCEVDivision::visitAddRecExpr(const SCEVAddRecExpr *Numerator) {
if (Ty != StartQ->getType() || Ty != StartR->getType() ||
Ty != StepQ->getType() || Ty != StepR->getType())
return cannotDivide(Numerator);
+
+ // Infer no-wrap flags for Remainder.
+ // TODO: Catch more cases.
+ SCEV::NoWrapFlags NumFlags = Numerator->getNoWrapFlags();
+ SCEV::NoWrapFlags RemFlags = SCEV::NoWrapFlags::FlagAnyWrap;
+ const SCEV *StepNumAbs =
+ SE.getAbsExpr(Numerator->getStepRecurrence(SE), /*IsNSW=*/false);
+ const SCEV *StepRAbs = SE.getAbsExpr(StepR, /*IsNSW=*/false);
+ const Loop *L = Numerator->getLoop();
+
+ // If abs(StepR) <=u abs(StepNumAbs) and both are loop invariant, propagate
+ // the <NW> from Numerator to Remainder.
+ if (ScalarEvolution::hasFlags(NumFlags, SCEV::NoWrapFlags::FlagNW) &&
+ SE.isLoopInvariant(StepNumAbs, L) && SE.isLoopInvariant(StepRAbs, L) &&
+ SE.isKnownPredicate(ICmpInst::ICMP_ULE, StepRAbs, StepNumAbs))
+ RemFlags = ScalarEvolution::setFlags(RemFlags, SCEV::NoWrapFlags::FlagNW);
+
Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),
- Numerator->getNoWrapFlags());
- Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(),
- Numerator->getNoWrapFlags());
+ SCEV::NoWrapFlags::FlagAnyWrap);
+ Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), RemFlags);
}
void SCEVDivision::visitAddExpr(const SCEVAddExpr *Numerator) {
diff --git a/llvm/test/Analysis/Delinearization/fixed_size_array.ll b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
index 0512044990163..e77fd79a49b35 100644
--- a/llvm/test/Analysis/Delinearization/fixed_size_array.ll
+++ b/llvm/test/Analysis/Delinearization/fixed_size_array.ll
@@ -163,7 +163,7 @@ exit:
; CHECK: Delinearization on function a_i_2j1_k:
; CHECK: Base offset: %a
; CHECK-NEXT: ArrayDecl[UnknownSize][4][64] with elements of 4 bytes.
-; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<nuw><%for.j.header>][{32,+,1}<nw><%for.k>]
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%for.i.header>][{0,+,1}<%for.j.header>][{32,+,1}<%for.k>]
define void @a_i_2j1_k(ptr %a) {
entry:
br label %for.i.header
diff --git a/llvm/test/Analysis/Delinearization/wraps.ll b/llvm/test/Analysis/Delinearization/wraps.ll
new file mode 100644
index 0000000000000..fc4935bad9939
--- /dev/null
+++ b/llvm/test/Analysis/Delinearization/wraps.ll
@@ -0,0 +1,130 @@
+; RUN: opt < %s -passes='print<delinearization>' -disable-output 2>&1 | FileCheck %s
+
+; In the following case, we don't know the concret value of `m`, so we cannot
+; deduce no-wrap behavior for the quotient/remainder divided by `m`. However,
+; we can infer `{0,+,1}<%loop>` is nuw and nsw from the induction variable.
+;
+; for (int i = 0; i < btc; i++)
+; a[i * (m + 42)] = 0;
+
+; CHECK: AccessFunction: {0,+,(42 + %m)}<nuw><nsw><%loop>
+; CHECK-NEXT: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 1 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><nsw><%loop>][{0,+,42}<%loop>]
+define void @divide_by_m0(ptr %a, i64 %m, i64 %btc) {
+entry:
+ %stride = add nsw nuw i64 %m, 42
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+ %offset = phi i64 [ 0, %entry ], [ %offset.next, %loop ]
+ %idx = getelementptr inbounds i8, ptr %a, i64 %offset
+ store i8 0, ptr %idx
+ %i.next = add nsw nuw i64 %i, 1
+ %offset.next = add nsw nuw i64 %offset, %stride
+ %cond = icmp eq i64 %i.next, %btc
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; In the following case, we don't know the concret value of `m`, so we cannot
+; deduce no-wrap behavior for the quotient/remainder divided by `m`. Also, we
+; don't infer nsw/nuw from the induction variable in this case.
+;
+; for (int i = 0; i < btc; i++)
+; a[i * (2 * m + 42)] = 0;
+
+; CHECK: AccessFunction: {0,+,(42 + (2 * %m))}<nuw><nsw><%loop>
+; CHECK-NEXT: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][%m] with elements of 1 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,2}<%loop>][{0,+,42}<%loop>]
+define void @divide_by_m2(ptr %a, i64 %m, i64 %btc) {
+entry:
+ %m2 = add nsw nuw i64 %m, %m
+ %stride = add nsw nuw i64 %m2, 42
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+ %offset = phi i64 [ 0, %entry ], [ %offset.next, %loop ]
+ %idx = getelementptr inbounds i8, ptr %a, i64 %offset
+ store i8 0, ptr %idx
+ %i.next = add nsw nuw i64 %i, 1
+ %offset.next = add nsw nuw i64 %offset, %stride
+ %cond = icmp eq i64 %i.next, %btc
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; In the following case, the `i * 2 * d` is always zero, so it's nsw and nuw.
+; However, the quotient divided by `d` is neither nsw nor nuw.
+;
+; if (d == 0)
+; for (unsigned long long i = 0; i != UINT64_MAX; i++)
+; a[i * 2 * d] = 42;
+
+; CHECK: AccessFunction: {0,+,(2 * %d)}<nuw><nsw><%loop>
+; CHECK-NEXT: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][%d] with elements of 1 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,2}<%loop>][0]
+define void @divide_by_zero(ptr %a, i64 %d) {
+entry:
+ %guard = icmp eq i64 %d, 0
+ br i1 %guard, label %loop.preheader, label %exit
+
+loop.preheader:
+ %stride = mul nsw nuw i64 %d, 2 ; since %d is 0, %stride is also 0
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %loop.preheader ], [ %i.next, %loop ]
+ %offset = phi i64 [ 0, %loop.preheader ], [ %offset.next, %loop ]
+ %idx = getelementptr inbounds i8, ptr %a, i64 %offset
+ store i8 42, ptr %idx
+ %i.next = add nuw i64 %i, 1
+ %offset.next = add nsw nuw i64 %offset, %stride
+ %cond = icmp eq i64 %i.next, -1
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; In the following case, the `i * (d + 1)` is always zero, so it's nsw and nuw.
+; However, the quotient/remainder divided by `d` is not nsw.
+;
+; if (d == UINT64_MAX)
+; for (unsigned long long i = 0; i != d; i++)
+; a[i * (d + 1)] = 42;
+
+; CHECK: AccessFunction: {0,+,(1 + %d)}<nuw><nsw><%loop>
+; CHECK-NEXT: Base offset: %a
+; CHECK-NEXT: ArrayDecl[UnknownSize][%d] with elements of 1 bytes.
+; CHECK-NEXT: ArrayRef[{0,+,1}<nuw><%loop>][{0,+,1}<nuw><%loop>]
+define void @divide_by_minus_one(ptr %a, i64 %d) {
+entry:
+ %guard = icmp eq i64 %d, -1
+ br i1 %guard, label %loop.preheader, label %exit
+
+loop.preheader:
+ %stride = add nsw i64 %d, 1 ; since %d is -1, %stride is 0
+ br label %loop
+
+loop:
+ %i = phi i64 [ 0, %loop.preheader ], [ %i.next, %loop ]
+ %offset = phi i64 [ 0, %loop.preheader ], [ %offset.next, %loop ]
+ %idx = getelementptr inbounds i8, ptr %a, i64 %offset
+ store i8 42, ptr %idx
+ %i.next = add nuw i64 %i, 1
+ %offset.next = add nsw nuw i64 %offset, %stride
+ %cond = icmp eq i64 %i.next, %d
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret void
+}
diff --git a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
index 8f94a455d3724..f670136aed750 100644
--- a/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
+++ b/llvm/test/Analysis/DependenceAnalysis/DADelin.ll
@@ -479,14 +479,16 @@ for.cond.cleanup: ; preds = %for.cond.cleanup3,
;; for (int k = 1; k < o; k++)
;; = A[i*m*o + j*o + k]
;; A[i*m*o + j*o + k - 1] =
+;;
+;; FIXME: Currently fails to infer nsw for the SCEV `{0,+,1}<for.body8>`
define void @t8(i32 %n, i32 %m, i32 %o, ptr nocapture %A) {
; CHECK-LABEL: 't8'
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx, align 4
; CHECK-NEXT: da analyze - none!
; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
-; CHECK-NEXT: da analyze - consistent anti [0 0 1]!
+; CHECK-NEXT: da analyze - anti [* * *|<]!
; CHECK-NEXT: Src: store i32 %add12, ptr %arrayidx2, align 4 --> Dst: store i32 %add12, ptr %arrayidx2, align 4
-; CHECK-NEXT: da analyze - none!
+; CHECK-NEXT: da analyze - output [* * *]!
;
entry:
%cmp49 = icmp sgt i32 %n, 0
More information about the llvm-commits
mailing list