[llvm] [SCEV] Allow udiv canonicalization of potentially-wrapping AddRecs (PR #169576)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 1 07:42:44 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/169576
>From 3e2f0d20b0ece6aa84e016ef26906055f80a4a93 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 25 Nov 2025 21:56:17 +0000
Subject: [PATCH] [SCEV] Allow udiv canonicalization of potentially-wrapping
AddRecs
Extend the {X,+,N}/C => {(X - X%N),+,N}/C canonicalization to handle
AddRecs that may wrap, when X < N <= C and both N,C are powers of 2.
The alignment and power-of-2 properties ensure division results remain
equivalent for all offsets [(X - X%N), X).
Alive2 Proof: https://alive2.llvm.org/ce/z/VGUZo3
Fixes https://github.com/llvm/llvm-project/issues/168709
---
llvm/lib/Analysis/ScalarEvolution.cpp | 28 +++++++++++++------
.../addrec-may-wrap-udiv-canonicalize.ll | 8 +++---
.../LoopVectorize/X86/uniformshift.ll | 28 ++++++++++++++++++-
3 files changed, 50 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index a31f17b1936d6..e6be22a8e4148 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3492,17 +3492,27 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
/// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
// We can currently only fold X%N if X is constant.
const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
- if (StartC && !DivInt.urem(StepInt) &&
- getZeroExtendExpr(AR, ExtTy) ==
- getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
- getZeroExtendExpr(Step, ExtTy),
- AR->getLoop(), SCEV::FlagAnyWrap)) {
+ if (StartC && !DivInt.urem(StepInt)) {
const APInt &StartInt = StartC->getAPInt();
const APInt &StartRem = StartInt.urem(StepInt);
- if (StartRem != 0) {
- const SCEV *NewLHS =
- getAddRecExpr(getConstant(StartInt - StartRem), Step,
- AR->getLoop(), SCEV::FlagNW);
+ bool NoWrap =
+ getZeroExtendExpr(AR, ExtTy) ==
+ getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+ getZeroExtendExpr(Step, ExtTy), AR->getLoop(),
+ SCEV::FlagAnyWrap);
+
+ // With X < N <= C and both N, C as powers-of-2, the transformation
+ // {X,+,N}/C => {(X - X%N),+,N}/C preserves division results even
+ // if wrapping occurs, as the division results remain equivalent for
+ // all offsets in [[(X - X%N), X).
+ bool CanFoldWithWrap = StepInt.isStrictlyPositive() &&
+ StartInt.ult(StepInt) && // X < N
+ StepInt.sle(DivInt) && // N <= C
+ StepInt.isPowerOf2() && DivInt.isPowerOf2();
+ if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) {
+ const SCEV *NewLHS = getAddRecExpr(
+ getConstant(StartInt - StartRem), Step, AR->getLoop(),
+ NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
if (LHS != NewLHS) {
LHS = NewLHS;
diff --git a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
index 0a6ef0dad4569..bb0744f8c4550 100644
--- a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
+++ b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
@@ -13,7 +13,7 @@ define void @test_step2_div4(i64 %n) {
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
-; CHECK-NEXT: --> ({1,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ({0,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
; CHECK-NEXT: --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
@@ -114,15 +114,15 @@ define void @test_step4_div4(i64 %n) {
; CHECK-NEXT: %iv.1 = add i64 %iv, 1
; CHECK-NEXT: --> {1,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.1 = udiv i64 %iv.1, 4
-; CHECK-NEXT: --> ({1,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.2 = add i64 %iv, 2
; CHECK-NEXT: --> {2,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.2 = udiv i64 %iv.2, 4
-; CHECK-NEXT: --> ({2,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.3 = add i64 %iv, 3
; CHECK-NEXT: --> {3,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.3 = udiv i64 %iv.3, 4
-; CHECK-NEXT: --> ({3,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT: --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %iv.4 = add i64 %iv, 4
; CHECK-NEXT: --> {4,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
; CHECK-NEXT: %div.4 = udiv i64 %iv.4, 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
index 166875dd55aae..b612bfb88198e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
@@ -5,7 +5,7 @@
; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction: %shift = ashr i32 %val, %k
; CHECK: Cost of 2 for VF 2: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
; CHECK: Cost of 2 for VF 4: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
-define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr #0 {
+define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr {
entry:
br label %body
@@ -21,5 +21,31 @@ body:
exit:
ret void
+}
+
+; CHECK: 'shift_and_masked_load_store'
+; CHECK: Cost of 1 for VF 2: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
+; CHECK: Cost of 1 for VF 4: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
+; CHECK: Cost of 4 for VF 8: WIDEN ir<%shifted> = lshr ir<%iv>, ir<2>
+define void @shift_and_masked_load_store(i64 %trip.count) #0 {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %shifted = lshr i64 %iv, 2
+ %masked.idx = and i64 %shifted, 1
+ %load.ptr = getelementptr i16, ptr poison, i64 %masked.idx
+ %val = load i16, ptr %load.ptr, align 2
+ %store.idx = shl nuw i64 %iv, 2
+ %store.ptr = getelementptr i8, ptr poison, i64 %store.idx
+ store i16 %val, ptr %store.ptr, align 2
+ %iv.next = add i64 %iv, 1
+ %cmp = icmp eq i64 %iv, %trip.count
+ br i1 %cmp, label %exit, label %loop
+exit:
+ ret void
}
+
+attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" }
More information about the llvm-commits
mailing list