[llvm] [SCEV] Allow udiv canonicalization of potentially-wrapping AddRecs (PR #169576)

Mon Dec 1 10:25:42 PST 2025

https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/169576

>From 5227c08b11fa3505830a1267032f7957ecf331f2 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Tue, 25 Nov 2025 21:56:17 +0000
Subject: [PATCH 1/2] [SCEV] Allow udiv canonicalization of
 potentially-wrapping AddRecs

Extend the {X,+,N}/C => {(X - X%N),+,N}/C canonicalization to handle
AddRecs that may wrap, when X < N <= C and both N,C are powers of 2.
The alignment and power-of-2 properties ensure division results remain
equivalent for all offsets [(X - X%N), X).

Alive2 Proof: https://alive2.llvm.org/ce/z/VGUZo3

Fixes https://github.com/llvm/llvm-project/issues/168709
---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 28 +++++++++++++------
 .../addrec-may-wrap-udiv-canonicalize.ll      |  8 +++---
 .../LoopVectorize/X86/uniformshift.ll         | 28 ++++++++++++++++++-
 3 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index a31f17b1936d6..e6be22a8e4148 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3492,17 +3492,27 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
           /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
           // We can currently only fold X%N if X is constant.
           const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
-          if (StartC && !DivInt.urem(StepInt) &&
-              getZeroExtendExpr(AR, ExtTy) ==
-              getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
-                            getZeroExtendExpr(Step, ExtTy),
-                            AR->getLoop(), SCEV::FlagAnyWrap)) {
+          if (StartC && !DivInt.urem(StepInt)) {
             const APInt &StartInt = StartC->getAPInt();
             const APInt &StartRem = StartInt.urem(StepInt);
-            if (StartRem != 0) {
-              const SCEV *NewLHS =
-                  getAddRecExpr(getConstant(StartInt - StartRem), Step,
-                                AR->getLoop(), SCEV::FlagNW);
+            bool NoWrap =
+                getZeroExtendExpr(AR, ExtTy) ==
+                getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
+                              getZeroExtendExpr(Step, ExtTy), AR->getLoop(),
+                              SCEV::FlagAnyWrap);
+
+            // With X < N <= C and both N, C as powers-of-2, the transformation
+            // {X,+,N}/C => {(X - X%N),+,N}/C preserves division results even
+            // if wrapping occurs, as the division results remain equivalent for
+            // all offsets in [[(X - X%N), X).
+            bool CanFoldWithWrap = StepInt.isStrictlyPositive() &&
+                                   StartInt.ult(StepInt) && // X < N
+                                   StepInt.sle(DivInt) &&   // N <= C
+                                   StepInt.isPowerOf2() && DivInt.isPowerOf2();
+            if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) {
+              const SCEV *NewLHS = getAddRecExpr(
+                  getConstant(StartInt - StartRem), Step, AR->getLoop(),
+                  NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
               if (LHS != NewLHS) {
                 LHS = NewLHS;
 
diff --git a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
index 0a6ef0dad4569..bb0744f8c4550 100644
--- a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
+++ b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
@@ -13,7 +13,7 @@ define void @test_step2_div4(i64 %n) {
 ; CHECK-NEXT:    %iv.1 = add i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %div.1 = udiv i64 %iv.1, 4
-; CHECK-NEXT:    --> ({1,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> ({0,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.2 = add i64 %iv, 2
 ; CHECK-NEXT:    --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %div.2 = udiv i64 %iv.2, 4
@@ -114,15 +114,15 @@ define void @test_step4_div4(i64 %n) {
 ; CHECK-NEXT:    %iv.1 = add i64 %iv, 1
 ; CHECK-NEXT:    --> {1,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %div.1 = udiv i64 %iv.1, 4
-; CHECK-NEXT:    --> ({1,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.2 = add i64 %iv, 2
 ; CHECK-NEXT:    --> {2,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %div.2 = udiv i64 %iv.2, 4
-; CHECK-NEXT:    --> ({2,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.3 = add i64 %iv, 3
 ; CHECK-NEXT:    --> {3,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %div.3 = udiv i64 %iv.3, 4
-; CHECK-NEXT:    --> ({3,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> ({0,+,4}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.4 = add i64 %iv, 4
 ; CHECK-NEXT:    --> {4,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %div.4 = udiv i64 %iv.4, 4
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
index 166875dd55aae..b612bfb88198e 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
@@ -5,7 +5,7 @@
 ; CHECK: LV: Found an estimated cost of 1 for VF 1 For instruction:   %shift = ashr i32 %val, %k
 ; CHECK: Cost of 2 for VF 2: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
 ; CHECK: Cost of 2 for VF 4: WIDEN ir<%shift> = ashr ir<%val>, ir<%k>
-define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr #0 {
+define void @foo(ptr nocapture %p, i32 %k) local_unnamed_addr {
 entry:
   br label %body
 
@@ -21,5 +21,31 @@ body:
 
 exit:
   ret void
+}
+
+; CHECK: 'shift_and_masked_load_store'
+; CHECK: Cost of 1 for VF 2: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
+; CHECK: Cost of 1 for VF 4: CLONE ir<%shifted> = lshr vp<{{.+}}>, ir<2>
+; CHECK: Cost of 4 for VF 8: WIDEN ir<%shifted> = lshr ir<%iv>, ir<2>
+define void @shift_and_masked_load_store(i64 %trip.count) #0 {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %shifted = lshr i64 %iv, 2
+  %masked.idx = and i64 %shifted, 1
+  %load.ptr = getelementptr i16, ptr poison, i64 %masked.idx
+  %val = load i16, ptr %load.ptr, align 2
+  %store.idx = shl nuw i64 %iv, 2
+  %store.ptr = getelementptr i8, ptr poison, i64 %store.idx
+  store i16 %val, ptr %store.ptr, align 2
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp eq i64 %iv, %trip.count
+  br i1 %cmp, label %exit, label %loop
 
+exit:
+  ret void
 }
+
+attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" }

>From 43d04f72aee9bdc0e55b93968c7e1b524c6e657d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 1 Dec 2025 18:24:55 +0000
Subject: [PATCH 2/2] !fixup SLE -> ULE, thanks

---
 llvm/lib/Analysis/ScalarEvolution.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index e6be22a8e4148..515f330f9dcec 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3505,9 +3505,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
             // {X,+,N}/C => {(X - X%N),+,N}/C preserves division results even
             // if wrapping occurs, as the division results remain equivalent for
             // all offsets in [[(X - X%N), X).
-            bool CanFoldWithWrap = StepInt.isStrictlyPositive() &&
-                                   StartInt.ult(StepInt) && // X < N
-                                   StepInt.sle(DivInt) &&   // N <= C
+            bool CanFoldWithWrap = StartInt.ult(StepInt) && // X < N
+                                   StepInt.ule(DivInt) &&   // N <= C
                                    StepInt.isPowerOf2() && DivInt.isPowerOf2();
             if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) {
               const SCEV *NewLHS = getAddRecExpr(