[llvm] [SCEV] Handle non-constant start values in AddRec UDiv canonicalization. (PR #170474)

Wed Dec 3 04:51:18 PST 2025

https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/170474

Follow-up to https://github.com/llvm/llvm-project/pull/169576 to enable UDiv canonicalization if the start of the AddRec is not constant.

The fold is not restricted to constant start values, as long as we are able to compute a constant remainder. The fold is only applied if the subtraction of the remainder can be folded into to start expression, but that is just to avoid creating more complex AddRecs.

For reference, the proof from #169576 is https://alive2.llvm.org/ce/z/iu2tav

>From 7e97483dbe4315e6c85f627d10a3049452093577 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 3 Dec 2025 12:49:46 +0000
Subject: [PATCH] [SCEV] Handle non-constant start values in AddRec UDiv
 canonicalization.

Follow-up to https://github.com/llvm/llvm-project/pull/169576 to enable
UDiv canonicalization if the start of the AddRec is not constant.

The fold is not restricted to constant start values, as long as we are
able to compute a constant remainder. The fold is only applied if the
subtraction of the remainder can be folded into to start expression, but
that is just to avoid creating more complex AddRecs.

For reference, the proof from #169576 is https://alive2.llvm.org/ce/z/iu2tav
---
 llvm/lib/Analysis/ScalarEvolution.cpp         | 20 ++++++-----
 .../addrec-may-wrap-udiv-canonicalize.ll      | 16 ++++-----
 .../LoopVectorize/X86/uniformshift.ll         | 33 +++++++++++++++++++
 3 files changed, 53 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 5f6718d6cbcd8..d744a3fcc0042 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3491,10 +3491,9 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
           /// Get a canonical UDivExpr for a recurrence.
           /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
           // We can currently only fold X%N if X is constant.
-          const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
-          if (StartC && !DivInt.urem(StepInt)) {
-            const APInt &StartInt = StartC->getAPInt();
-            const APInt &StartRem = StartInt.urem(StepInt);
+          const APInt *StartRem;
+          if (!DivInt.urem(StepInt) && match(getURemExpr(AR->getStart(), Step),
+                                             m_scev_APInt(StartRem))) {
             bool NoWrap =
                 getZeroExtendExpr(AR, ExtTy) ==
                 getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
@@ -3507,10 +3506,15 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
             // all offsets in [[(X - X%N), X).
             bool CanFoldWithWrap = StepInt.ule(DivInt) && // N <= C
                                    StepInt.isPowerOf2() && DivInt.isPowerOf2();
-            if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) {
-              const SCEV *NewLHS = getAddRecExpr(
-                  getConstant(StartInt - StartRem), Step, AR->getLoop(),
-                  NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
+            // Only fold if the subtraction can be folded in the start
+            // expression.
+            const SCEV *NewStart =
+                getMinusSCEV(AR->getStart(), getConstant(*StartRem));
+            if (*StartRem != 0 && (NoWrap || CanFoldWithWrap) &&
+                !isa<SCEVAddExpr>(NewStart)) {
+              const SCEV *NewLHS =
+                  getAddRecExpr(NewStart, Step, AR->getLoop(),
+                                NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
               if (LHS != NewLHS) {
                 LHS = NewLHS;
 
diff --git a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
index 9a9a6a7d45931..e041c96371762 100644
--- a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
+++ b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
@@ -180,7 +180,7 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) {
 ; CHECK-NEXT:    %iv.1 = add i64 %iv, 1
 ; CHECK-NEXT:    --> {{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.1 = udiv i64 %iv.1, 4
-; CHECK-NEXT:    --> ({{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %iv.2 = add i64 %iv, 2
 ; CHECK-NEXT:    --> {{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.2 = udiv i64 %iv.2, 4
@@ -188,7 +188,7 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) {
 ; CHECK-NEXT:    %iv.3 = add i64 %iv, 3
 ; CHECK-NEXT:    --> {{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.3 = udiv i64 %iv.3, 4
-; CHECK-NEXT:    --> ({{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %iv.4 = add i64 %iv, 4
 ; CHECK-NEXT:    --> {{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.4 = udiv i64 %iv.4, 4
@@ -196,11 +196,11 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) {
 ; CHECK-NEXT:    %iv.5 = add i64 %iv, 5
 ; CHECK-NEXT:    --> {{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.5 = udiv i64 %iv.5, 4
-; CHECK-NEXT:    --> ({{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %iv.neg.1 = add i64 %iv, -1
 ; CHECK-NEXT:    --> {{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.neg.1 = udiv i64 %iv.neg.1, 4
-; CHECK-NEXT:    --> ({{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}-2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div3.0 = udiv i64 %iv, 3
 ; CHECK-NEXT:    --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div3.1 = udiv i64 %iv.1, 3
@@ -296,7 +296,7 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) {
 ; CHECK-NEXT:    %iv.1 = add i64 %iv, 1
 ; CHECK-NEXT:    --> {{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.1 = udiv i64 %iv.1, 4
-; CHECK-NEXT:    --> ({{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %iv.2 = add i64 %iv, 2
 ; CHECK-NEXT:    --> {{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.2 = udiv i64 %iv.2, 4
@@ -304,7 +304,7 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) {
 ; CHECK-NEXT:    %iv.3 = add i64 %iv, 3
 ; CHECK-NEXT:    --> {{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.3 = udiv i64 %iv.3, 4
-; CHECK-NEXT:    --> ({{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %iv.4 = add i64 %iv, 4
 ; CHECK-NEXT:    --> {{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.4 = udiv i64 %iv.4, 4
@@ -312,11 +312,11 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) {
 ; CHECK-NEXT:    %iv.5 = add i64 %iv, 5
 ; CHECK-NEXT:    --> {{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.5 = udiv i64 %iv.5, 4
-; CHECK-NEXT:    --> ({{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %iv.neg.1 = add i64 %iv, -1
 ; CHECK-NEXT:    --> {{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.neg.1 = udiv i64 %iv.neg.1, 4
-; CHECK-NEXT:    --> ({{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}-2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div3.0 = udiv i64 %iv, 3
 ; CHECK-NEXT:    --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div3.1 = udiv i64 %iv.1, 3
diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
index b612bfb88198e..02c0b676374f4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
@@ -48,4 +48,37 @@ exit:
   ret void
 }
 
+define i64 @sdiv_arg_outer_iv(ptr noalias %dst, ptr %src) {
+; CHECK: 'sdiv_arg_outer_iv'
+; CHECK: Cost of 0 for VF 2: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8>
+; CHECK: Cost of 0 for VF 4: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8>
+; CHECK: Cost of 0 for VF 8: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8>
+; CHECK: Cost of 0 for VF 16: REPLICATE ir<%div> = sdiv ir<%add.offset>, ir<8>
+entry:
+  br label %outer.header
+
+outer.header:
+  %outer.iv = phi i32 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
+  %offset = shl nsw i32 %outer.iv, 7
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %outer.header ], [ %iv.next, %loop ]
+  %iv.trunc = trunc i64 %iv to i32
+  %add.offset = add i32 %offset, %iv.trunc
+  %div = sdiv i32 %add.offset, 8
+  %div.ext = sext i32 %div to i64
+  %gep.src = getelementptr i8, ptr %src, i64 %div.ext
+  %l = load i8, ptr %gep.src, align 1
+  %gep.dst = getelementptr i8, ptr %dst, i64 %iv
+  store i8 %l, ptr %gep.dst, align 1
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, 64
+  br i1 %ec, label %outer.latch, label %loop
+
+outer.latch:
+  %outer.iv.next = add nsw i32 %outer.iv, 1
+  br label %outer.header
+}
+
 attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" }