[llvm] 1054a6e - [SCEV] Handle non-constant start values in AddRec UDiv canonicalization. (#170474)

via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 3 13:13:16 PST 2025


Author: Florian Hahn
Date: 2025-12-03T21:13:11Z
New Revision: 1054a6e9dee0198da0a3d234fd3254aa9e143319

URL: https://github.com/llvm/llvm-project/commit/1054a6e9dee0198da0a3d234fd3254aa9e143319
DIFF: https://github.com/llvm/llvm-project/commit/1054a6e9dee0198da0a3d234fd3254aa9e143319.diff

LOG: [SCEV] Handle non-constant start values in AddRec UDiv canonicalization. (#170474)

Follow-up to https://github.com/llvm/llvm-project/pull/169576 to enable
UDiv canonicalization if the start of the AddRec is not constant.

The fold is not restricted to constant start values, as long as we are
able to compute a constant remainder. The fold is only applied if the
subtraction of the remainder can be folded into to start expression, but
that is just to avoid creating more complex AddRecs.

For reference, the proof from #169576 is
https://alive2.llvm.org/ce/z/iu2tav

PR: https://github.com/llvm/llvm-project/pull/170474

Added: 
    

Modified: 
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
    llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 0ac0ca7463131..1d7a8b981b5ee 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3490,11 +3490,9 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
           }
           /// Get a canonical UDivExpr for a recurrence.
           /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0.
-          // We can currently only fold X%N if X is constant.
-          const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
-          if (StartC && !DivInt.urem(StepInt)) {
-            const APInt &StartInt = StartC->getAPInt();
-            const APInt &StartRem = StartInt.urem(StepInt);
+          const APInt *StartRem;
+          if (!DivInt.urem(StepInt) && match(getURemExpr(AR->getStart(), Step),
+                                             m_scev_APInt(StartRem))) {
             bool NoWrap =
                 getZeroExtendExpr(AR, ExtTy) ==
                 getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
@@ -3507,10 +3505,15 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
             // all offsets in [[(X - X%N), X).
             bool CanFoldWithWrap = StepInt.ule(DivInt) && // N <= C
                                    StepInt.isPowerOf2() && DivInt.isPowerOf2();
-            if (StartRem != 0 && (NoWrap || CanFoldWithWrap)) {
-              const SCEV *NewLHS = getAddRecExpr(
-                  getConstant(StartInt - StartRem), Step, AR->getLoop(),
-                  NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
+            // Only fold if the subtraction can be folded in the start
+            // expression.
+            const SCEV *NewStart =
+                getMinusSCEV(AR->getStart(), getConstant(*StartRem));
+            if (*StartRem != 0 && (NoWrap || CanFoldWithWrap) &&
+                !isa<SCEVAddExpr>(NewStart)) {
+              const SCEV *NewLHS =
+                  getAddRecExpr(NewStart, Step, AR->getLoop(),
+                                NoWrap ? SCEV::FlagNW : SCEV::FlagAnyWrap);
               if (LHS != NewLHS) {
                 LHS = NewLHS;
 

diff  --git a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
index 9a9a6a7d45931..e041c96371762 100644
--- a/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
+++ b/llvm/test/Analysis/ScalarEvolution/addrec-may-wrap-udiv-canonicalize.ll
@@ -180,7 +180,7 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) {
 ; CHECK-NEXT:    %iv.1 = add i64 %iv, 1
 ; CHECK-NEXT:    --> {{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.1 = udiv i64 %iv.1, 4
-; CHECK-NEXT:    --> ({{\{\{}}1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %iv.2 = add i64 %iv, 2
 ; CHECK-NEXT:    --> {{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.2 = udiv i64 %iv.2, 4
@@ -188,7 +188,7 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) {
 ; CHECK-NEXT:    %iv.3 = add i64 %iv, 3
 ; CHECK-NEXT:    --> {{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.3 = udiv i64 %iv.3, 4
-; CHECK-NEXT:    --> ({{\{\{}}3,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %iv.4 = add i64 %iv, 4
 ; CHECK-NEXT:    --> {{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.4 = udiv i64 %iv.4, 4
@@ -196,11 +196,11 @@ define void @test_step2_start_outer_add_rec_step_16(i64 %n, i64 %m) {
 ; CHECK-NEXT:    %iv.5 = add i64 %iv, 5
 ; CHECK-NEXT:    --> {{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.5 = udiv i64 %iv.5, 4
-; CHECK-NEXT:    --> ({{\{\{}}5,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}4,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %iv.neg.1 = add i64 %iv, -1
 ; CHECK-NEXT:    --> {{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.neg.1 = udiv i64 %iv.neg.1, 4
-; CHECK-NEXT:    --> ({{\{\{}}-1,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}-2,+,16}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div3.0 = udiv i64 %iv, 3
 ; CHECK-NEXT:    --> ({{\{\{}}0,+,16}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div3.1 = udiv i64 %iv.1, 3
@@ -296,7 +296,7 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) {
 ; CHECK-NEXT:    %iv.1 = add i64 %iv, 1
 ; CHECK-NEXT:    --> {{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.1 = udiv i64 %iv.1, 4
-; CHECK-NEXT:    --> ({{\{\{}}1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %iv.2 = add i64 %iv, 2
 ; CHECK-NEXT:    --> {{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.2 = udiv i64 %iv.2, 4
@@ -304,7 +304,7 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) {
 ; CHECK-NEXT:    %iv.3 = add i64 %iv, 3
 ; CHECK-NEXT:    --> {{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.3 = udiv i64 %iv.3, 4
-; CHECK-NEXT:    --> ({{\{\{}}3,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %iv.4 = add i64 %iv, 4
 ; CHECK-NEXT:    --> {{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.4 = udiv i64 %iv.4, 4
@@ -312,11 +312,11 @@ define void @test_step2_div4_start_outer_add_rec_step_2(i64 %n, i64 %m) {
 ; CHECK-NEXT:    %iv.5 = add i64 %iv, 5
 ; CHECK-NEXT:    --> {{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.5 = udiv i64 %iv.5, 4
-; CHECK-NEXT:    --> ({{\{\{}}5,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}4,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %iv.neg.1 = add i64 %iv, -1
 ; CHECK-NEXT:    --> {{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div.neg.1 = udiv i64 %iv.neg.1, 4
-; CHECK-NEXT:    --> ({{\{\{}}-1,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
+; CHECK-NEXT:    --> ({{\{\{}}-2,+,2}<%outer.header>,+,2}<%loop> /u 4) U: [0,4611686018427387904) S: [0,4611686018427387904) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div3.0 = udiv i64 %iv, 3
 ; CHECK-NEXT:    --> ({{\{\{}}0,+,2}<%outer.header>,+,2}<%loop> /u 3) U: [0,6148914691236517205) S: [0,6148914691236517206) Exits: <<Unknown>> LoopDispositions: { %loop: Computable, %outer.header: Variant }
 ; CHECK-NEXT:    %div3.1 = udiv i64 %iv.1, 3

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
index b612bfb88198e..02c0b676374f4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll
@@ -48,4 +48,37 @@ exit:
   ret void
 }
 
+define i64 @sdiv_arg_outer_iv(ptr noalias %dst, ptr %src) {
+; CHECK: 'sdiv_arg_outer_iv'
+; CHECK: Cost of 0 for VF 2: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8>
+; CHECK: Cost of 0 for VF 4: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8>
+; CHECK: Cost of 0 for VF 8: CLONE ir<%div> = sdiv ir<%add.offset>, ir<8>
+; CHECK: Cost of 0 for VF 16: REPLICATE ir<%div> = sdiv ir<%add.offset>, ir<8>
+entry:
+  br label %outer.header
+
+outer.header:
+  %outer.iv = phi i32 [ 0, %entry ], [ %outer.iv.next, %outer.latch ]
+  %offset = shl nsw i32 %outer.iv, 7
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %outer.header ], [ %iv.next, %loop ]
+  %iv.trunc = trunc i64 %iv to i32
+  %add.offset = add i32 %offset, %iv.trunc
+  %div = sdiv i32 %add.offset, 8
+  %div.ext = sext i32 %div to i64
+  %gep.src = getelementptr i8, ptr %src, i64 %div.ext
+  %l = load i8, ptr %gep.src, align 1
+  %gep.dst = getelementptr i8, ptr %dst, i64 %iv
+  store i8 %l, ptr %gep.dst, align 1
+  %iv.next = add i64 %iv, 1
+  %ec = icmp eq i64 %iv, 64
+  br i1 %ec, label %outer.latch, label %loop
+
+outer.latch:
+  %outer.iv.next = add nsw i32 %outer.iv, 1
+  br label %outer.header
+}
+
 attributes #0 = { "target-features"="+avx2" "tune-cpu"="alderlake" }


        


More information about the llvm-commits mailing list