[llvm] [SCEV] Fold (C1 * A /u C2) -> A /u (C2 /u C1), if C2 > C1. (PR #157656)

Tue Sep 9 05:03:23 PDT 2025

https://github.com/fhahn created https://github.com/llvm/llvm-project/pull/157656

If C2 >u C1 and C1 >u 1, fold to A /u (C2 /u C1).

Depends on https://github.com/llvm/llvm-project/pull/157555. (included in PR)

Alive2 Proof: https://alive2.llvm.org/ce/z/BWvQYN

>From b635047aab2a1a1fb1da1c573926fb3eaf78c104 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 8 Sep 2025 20:29:39 +0100
Subject: [PATCH 1/2] [SCEV] Fold ((-1 * C1) * D / C1) -> -1 * D.

Treat negative constants C as -1 * abs(C1) when folding multiplies and
udivs.

Alive2 Proof: https://alive2.llvm.org/ce/z/bdj9W2
---
 llvm/lib/Analysis/ScalarEvolution.cpp            | 16 ++++++++++------
 .../Analysis/ScalarEvolution/mul-udiv-folds.ll   |  2 +-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 34e497d9ea3cb..30aa0856ae61d 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3217,15 +3217,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
       }
 
       // Try to fold (C1 * D /u C2) -> C1/C2 * D, if C1 and C2 are powers-of-2,
-      // D is a multiple of C2, and C1 is a multiple of C1.
+      // D is a multiple of C2, and C1 is a multiple of C2.
       const SCEV *D;
+      APInt C1V = LHSC->getAPInt();
+      // If C1 is negative, try (-1 * abs(C1)) instead.
+      if (C1V.isNegative() && !C1V.isMinSignedValue())
+        C1V = C1V.abs();
       const SCEVConstant *C2;
-      const APInt &LHSV = LHSC->getAPInt();
-      if (LHSV.isPowerOf2() &&
+      if (C1V.isPowerOf2() &&
           match(Ops[1], m_scev_UDiv(m_SCEV(D), m_SCEVConstant(C2))) &&
-          C2->getAPInt().isPowerOf2() && LHSV.uge(C2->getAPInt()) &&
-          LHSV.logBase2() <= getMinTrailingZeros(D)) {
-        return getMulExpr(getUDivExpr(LHSC, C2), D);
+          C2->getAPInt().isPowerOf2() && C1V.uge(C2->getAPInt()) &&
+          C1V.logBase2() <= getMinTrailingZeros(D)) {
+        const SCEV *NewMul = getMulExpr(getUDivExpr(getConstant(C1V), C2), D);
+        return C1V == LHSC->getAPInt() ? NewMul : getNegativeSCEV(NewMul);
       }
     }
   }
diff --git a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
index dfaf0c95bc2f8..8dd8ec47e7090 100644
--- a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
+++ b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
@@ -23,7 +23,7 @@ define void @udiv4_and_udiv2(i1 %c, ptr %A) {
 ; CHECK-NEXT:    %gep.16 = getelementptr i16, ptr %A, i64 %iv
 ; CHECK-NEXT:    --> {((2 * ((zext i32 %start to i64) /u 4))<nuw><nsw> + %A),+,2}<%loop> U: full-set S: full-set Exits: ((zext i32 %start to i64) + %A) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %gep.32 = getelementptr i32, ptr %A, i64 %iv
-; CHECK-NEXT:    --> {((zext i32 %start to i64) + %A),+,4}<%loop> U: full-set S: full-set Exits: ((3 * (zext i32 %start to i64))<nuw><nsw> + (-4 * ((zext i32 %start to i64) /u 4))<nsw> + %A) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {((zext i32 %start to i64) + %A),+,4}<%loop> U: full-set S: full-set Exits: ((2 * (zext i32 %start to i64))<nuw><nsw> + %A) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %gep.40 = getelementptr <{ i32, i8 }>, ptr %A, i64 %iv
 ; CHECK-NEXT:    --> {((5 * ((zext i32 %start to i64) /u 4))<nuw><nsw> + %A),+,5}<%loop> U: full-set S: full-set Exits: ((5 * ((zext i32 %start to i64) /u 2))<nuw><nsw> + %A) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %gep.48 = getelementptr <{ i32, i16 }>, ptr %A, i64 %iv

>From 9881292e47f1e8f932f4c24f1b3fe05762725eff Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 8 Sep 2025 14:22:33 +0100
Subject: [PATCH 2/2] [SCEV] Fold  (C1 * A /u C2) -> A /u (C2 /u C1), if C2 >
 C1.

If C2 >u C1 and C1 >u 1, fold to A /u (C2 /u C1).

Depends on https://github.com/llvm/llvm-project/pull/157555.

Alive2 Proof: https://alive2.llvm.org/ce/z/BWvQYN
---
 llvm/lib/Analysis/ScalarEvolution.cpp              | 14 ++++++++++----
 .../Analysis/ScalarEvolution/mul-udiv-folds.ll     |  2 +-
 .../LoopStrengthReduce/duplicated-phis.ll          |  3 +--
 3 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 30aa0856ae61d..9f5b33f50cc0b 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -3217,7 +3217,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
       }
 
       // Try to fold (C1 * D /u C2) -> C1/C2 * D, if C1 and C2 are powers-of-2,
-      // D is a multiple of C2, and C1 is a multiple of C2.
+      // D is a multiple of C2, and C1 is a multiple of C2. If C2 is a multiple
+      // of C1, fold to (D /u (C2 /u C1)).
       const SCEV *D;
       APInt C1V = LHSC->getAPInt();
       // If C1 is negative, try (-1 * abs(C1)) instead.
@@ -3226,10 +3227,15 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
       const SCEVConstant *C2;
       if (C1V.isPowerOf2() &&
           match(Ops[1], m_scev_UDiv(m_SCEV(D), m_SCEVConstant(C2))) &&
-          C2->getAPInt().isPowerOf2() && C1V.uge(C2->getAPInt()) &&
+          C2->getAPInt().isPowerOf2() &&
           C1V.logBase2() <= getMinTrailingZeros(D)) {
-        const SCEV *NewMul = getMulExpr(getUDivExpr(getConstant(C1V), C2), D);
-        return C1V == LHSC->getAPInt() ? NewMul : getNegativeSCEV(NewMul);
+        const SCEV *NewMul = nullptr;
+        if (C1V.uge(C2->getAPInt()))
+          NewMul = getMulExpr(getUDivExpr(getConstant(C1V), C2), D);
+        else if (C1V.ugt(1))
+          NewMul = getUDivExpr(D, getUDivExpr(C2, getConstant(C1V)));
+        if (NewMul)
+          return C1V == LHSC->getAPInt() ? NewMul : getNegativeSCEV(NewMul);
       }
     }
   }
diff --git a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
index 8dd8ec47e7090..1d34706baadeb 100644
--- a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
+++ b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll
@@ -21,7 +21,7 @@ define void @udiv4_and_udiv2(i1 %c, ptr %A) {
 ; CHECK-NEXT:    %gep.8 = getelementptr i8, ptr %A, i64 %iv
 ; CHECK-NEXT:    --> {(((zext i32 %start to i64) /u 4) + %A),+,1}<%loop> U: full-set S: full-set Exits: (((zext i32 %start to i64) /u 2) + %A) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %gep.16 = getelementptr i16, ptr %A, i64 %iv
-; CHECK-NEXT:    --> {((2 * ((zext i32 %start to i64) /u 4))<nuw><nsw> + %A),+,2}<%loop> U: full-set S: full-set Exits: ((zext i32 %start to i64) + %A) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(((zext i32 %start to i64) /u 2) + %A),+,2}<%loop> U: full-set S: full-set Exits: ((zext i32 %start to i64) + %A) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %gep.32 = getelementptr i32, ptr %A, i64 %iv
 ; CHECK-NEXT:    --> {((zext i32 %start to i64) + %A),+,4}<%loop> U: full-set S: full-set Exits: ((2 * (zext i32 %start to i64))<nuw><nsw> + %A) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %gep.40 = getelementptr <{ i32, i8 }>, ptr %A, i64 %iv
diff --git a/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll b/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll
index cee8c8abdb450..c59f7d9c2a41a 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll
@@ -18,8 +18,7 @@ define i64 @test_duplicated_phis(i64 noundef %N) {
 ; CHECK:       [[FOR_BODY_PREHEADER_NEW]]:
 ; CHECK-NEXT:    [[UNROLL_ITER:%.*]] = and i64 [[MUL]], -4
 ; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[UNROLL_ITER]], -4
-; CHECK-NEXT:    [[TMP5:%.*]] = lshr i64 [[TMP4]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP5]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i64 [[TMP4]], 1
 ; CHECK-NEXT:    [[LSR_IV_NEXT:%.*]] = sub i64 -3, [[TMP3]]
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_BODY]]: