[llvm] [SCCP] Fold y = f(x) = (Cx mod M) ∈ R into x ∈ R' (PR #186347)

Thu Mar 26 01:48:21 PDT 2026

https://github.com/Camsyn updated https://github.com/llvm/llvm-project/pull/186347

>From 4b2a73fd025d96914d9358b30e86688a023885b2 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Mon, 23 Mar 2026 17:47:30 +0800
Subject: [PATCH 1/9] Pre-commit tests

---
 .../Transforms/SCCP/relax-range-checks.ll     | 123 ++++++++++++++++++
 1 file changed, 123 insertions(+)

diff --git a/llvm/test/Transforms/SCCP/relax-range-checks.ll b/llvm/test/Transforms/SCCP/relax-range-checks.ll
index 34e48136df37a..f7c4f6f468929 100644
--- a/llvm/test/Transforms/SCCP/relax-range-checks.ll
+++ b/llvm/test/Transforms/SCCP/relax-range-checks.ll
@@ -113,4 +113,127 @@ define i1 @range_check_to_icmp_eq2(i32 range(i32 -1, 2) %x) {
   ret i1 %cmp
 }
 
+define i1 @range_check_to_icmp_ult(i8 range(i8 2, 10) %x) {
+; CHECK-LABEL: define i1 @range_check_to_icmp_ult(
+; CHECK-SAME: i8 range(i8 2, 10) [[X:%.*]]) {
+; CHECK-NEXT:    [[OFF:%.*]] = add nsw i8 [[X]], -2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[OFF]], 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %off = add i8 %x, -2
+  %cmp = icmp ult i8 %off, 4
+  ret i1 %cmp
+}
+
+define i1 @range_check_to_icmp_uge(i8 range(i8 2, 6) %x) {
+; CHECK-LABEL: define i1 @range_check_to_icmp_uge(
+; CHECK-SAME: i8 range(i8 2, 6) [[X:%.*]]) {
+; CHECK-NEXT:    [[OFF:%.*]] = add nsw i8 [[X]], -4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[OFF]], 2
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %off = add nsw i8 %x, -4
+  %cmp = icmp ult i8 %off, 2
+  ret i1 %cmp
+}
+
+define i1 @range_check_to_icmp_slt(i8 range(i8 -56, 20) %x) {
+; CHECK-LABEL: define i1 @range_check_to_icmp_slt(
+; CHECK-SAME: i8 range(i8 -56, 20) [[X:%.*]]) {
+; CHECK-NEXT:    [[OFF:%.*]] = add nsw i8 [[X]], 56
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[OFF]], 50
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %off = add nsw i8 %x, 56
+  %cmp = icmp ult i8 %off, 50
+  ret i1 %cmp
+}
+
+define i1 @range_check_to_icmp_sge(i8 range(i8 -56, 20) %x) {
+; CHECK-LABEL: define i1 @range_check_to_icmp_sge(
+; CHECK-SAME: i8 range(i8 -56, 20) [[X:%.*]]) {
+; CHECK-NEXT:    [[OFF:%.*]] = add nsw i8 [[X]], 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[OFF]], 36
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %off = add nsw i8 %x, 16
+  %cmp = icmp ult i8 %off, 36
+  ret i1 %cmp
+}
+
+; Cover the early exit when ActiveCmpCR is already a one-icmp check.
+
+define i1 @range_check_intersection_to_icmp_eq(i32 range(i32 0, 4) %x) {
+; CHECK-LABEL: define i1 @range_check_intersection_to_icmp_eq(
+; CHECK-SAME: i32 range(i32 0, 4) [[X:%.*]]) {
+; CHECK-NEXT:    [[OFF:%.*]] = add nsw i32 [[X]], -3
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[X]], 3
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %off = add nsw i32 %x, -3
+  %cmp = icmp ult i32 %off, 2
+  ret i1 %cmp
+}
+
+define i1 @range_check_intersection_to_icmp_ult(i8 range(i8 0, 10) %x) {
+; CHECK-LABEL: define i1 @range_check_intersection_to_icmp_ult(
+; CHECK-SAME: i8 range(i8 0, 10) [[X:%.*]]) {
+; CHECK-NEXT:    [[OFF:%.*]] = add nuw nsw i8 [[X]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %off = add i8 %x, 2
+  %cmp = icmp ult i8 %off, 6
+  ret i1 %cmp
+}
+
+define i1 @range_check_intersection_to_icmp_slt(i8 range(i8 -128, -100) %x) {
+; CHECK-LABEL: define i1 @range_check_intersection_to_icmp_slt(
+; CHECK-SAME: i8 range(i8 -128, -100) [[X:%.*]]) {
+; CHECK-NEXT:    [[OFF:%.*]] = add i8 [[X]], -120
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X]], -118
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %off = add i8 %x, -120
+  %cmp = icmp ult i8 %off, 18
+  ret i1 %cmp
+}
+
+define i1 @range_check_intersection_to_icmp_uge(i8 range(i8 -6, 0) %x) {
+; CHECK-LABEL: define i1 @range_check_intersection_to_icmp_uge(
+; CHECK-SAME: i8 range(i8 -6, 0) [[X:%.*]]) {
+; CHECK-NEXT:    [[OFF:%.*]] = add nsw i8 [[X]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i8 [[X]], -2
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %off = add i8 %x, 2
+  %cmp = icmp ult i8 %off, 6
+  ret i1 %cmp
+}
+
+define i1 @range_check_intersection_to_icmp_sge(i8 range(i8 120, -128) %x) {
+; CHECK-LABEL: define i1 @range_check_intersection_to_icmp_sge(
+; CHECK-SAME: i8 range(i8 120, -128) [[X:%.*]]) {
+; CHECK-NEXT:    [[OFF:%.*]] = add nsw i8 [[X]], -122
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i8 [[X]], 122
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %off = add i8 %x, -122
+  %cmp = icmp ult i8 %off, 14
+  ret i1 %cmp
+}
+
+; Negative test: CmpCR relaxation cannot perform when x's range is nuw and nsw.
+define i1 @range_check_nsw_nuw(i8 range(i8 -20, -56) %x) {
+; CHECK-LABEL: define i1 @range_check_nsw_nuw(
+; CHECK-SAME: i8 range(i8 -20, -56) [[X:%.*]]) {
+; CHECK-NEXT:    [[OFF:%.*]] = add i8 [[X]], 20
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[OFF]], 14
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %off = add i8 %x, 20
+  %cmp = icmp ult i8 %off, 14
+  ret i1 %cmp
+}
+
 declare void @use(i8)

>From 4be1c34e10a4c538047f7c64eff8a9e0e4f0fd1c Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Mon, 23 Mar 2026 18:06:10 +0800
Subject: [PATCH 2/9] Relax 2-insts range check to one-icmp check

---
 llvm/lib/Transforms/Utils/SCCPSolver.cpp      | 130 +++++++++++++++---
 .../Transforms/SCCP/relax-range-checks.ll     |   8 +-
 2 files changed, 112 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index fd315c14df866..c3414d63ada57 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/SCCPSolver.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -105,6 +106,96 @@ static ConstantRange getRange(Value *Op, SCCPSolver &Solver,
                                                        /*UndefAllowed=*/false);
 }
 
+/// SCCP already proves x \in KnownCR, so only ActiveCmpCR = CmpCR ∩ KnownCR
+/// matters. Try to replace CmpCR with a simpler equivalent range NewCmpCR
+/// such that NewCmpCR ∩ KnownCR == ActiveCmpCR.
+///
+/// Prefer ranges that lower to a single canonical compare without an add:
+///   - [L, L+1)      --> X  eq L
+///   - [R+1, R)      --> X  ne R
+///   - [0, R)        --> X ult R
+///   - [L, 0)        --> X uge L
+///   - [SignMin, R)  --> X slt R
+///   - [L, SignMin)  --> X sge L
+///
+/// If no such range preserves the active semantics under KnownCR, keep CmpCR.
+static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
+                                      const ConstantRange &KnownCR) {
+  assert(!KnownCR.inverse().contains(CmpCR) &&
+         "CmpCR ∩ KnowCR should not be ∅");
+  assert((!CmpCR.isFullSet() && !CmpCR.isEmptySet()) && "Unexpected CmpCR");
+  assert((!KnownCR.isFullSet() && !KnownCR.isEmptySet()) &&
+         "Unexpected KnownCR");
+
+  // If KnownCR is both nuw and nsw, we cannot relax CmpCR at all.
+  if (KnownCR.isWrappedSet() && KnownCR.isSignWrappedSet())
+    return CmpCR;
+
+  const unsigned BW = CmpCR.getBitWidth();
+  // All reachable value satisfy CmpCR --> always true.
+  if (CmpCR.contains(KnownCR))
+    return ConstantRange::getFull(BW);
+
+  std::optional<ConstantRange> ActCmpCR = CmpCR.exactIntersectWith(KnownCR);
+  if (!ActCmpCR)
+    return CmpCR;
+
+  const APInt &CmpLo = ActCmpCR->getLower(), &CmpHi = ActCmpCR->getUpper();
+
+  // If the intersection happens to be the ONE-icmp check, just return it.
+  if (/*eq*/ ActCmpCR->isSingleElement() ||
+      /*ne*/ ActCmpCR->inverse().isSingleElement() ||
+      /*ult*/ CmpLo.isZero() ||
+      /*slt*/ CmpLo.isMinSignedValue() ||
+      /*uge*/ CmpHi.isZero() ||
+      /*sge*/ CmpHi.isMinSignedValue())
+    return *ActCmpCR;
+
+  const APInt Zero = APInt::getZero(BW);
+  const APInt SignMin = APInt::getSignedMinValue(BW);
+
+  if (CmpLo == KnownCR.getLower()) {
+    // Tie to lower:
+
+    // Try ult
+    // 0
+    // |  L------------R   : KnownCR
+    // |  L---R            : ActiveCmpCR
+    // L------R            : RelaxedCmpCR
+    if (!KnownCR.isWrappedSet())
+      return ConstantRange::getNonEmpty(Zero, CmpHi);
+
+    // Try slt
+    //       smin                                 smin
+    // -----R  |  L------- : KnownCR        ----R   |  L------- : KnownCR
+    //         |  L--R     : ActiveCmpCR    --R     |  L------- : ActiveCmpCR
+    //         L-----R     : RelaxedCmpCR   --R     L---------- : RelaxedCmpCR
+    if (!KnownCR.isSignWrappedSet())
+      return ConstantRange::getNonEmpty(SignMin, CmpHi);
+
+  } else if (CmpHi == KnownCR.getUpper()) {
+    // Tie to upper:
+
+    // Try uge
+    // 0
+    // |  L--------R       : KnownCR
+    // |       L---R       : ActiveCmpCR
+    // R       L---------- : RelaxedCmpCR
+    if (!KnownCR.isWrappedSet())
+      return ConstantRange::getNonEmpty(CmpLo, Zero);
+
+    // Try sge
+    //       smin                                 smin
+    // -----R  |  L------- : KnownCR        -----R  |  L------- : KnownCR
+    //   L--R  |           : ActiveCmpCR    -----R  |      L--- : ActiveCmpCR
+    //   L-----R           : RelaxedCmpCR   --------R      L--- : RelaxedCmpCR
+    if (!KnownCR.isSignWrappedSet())
+      return ConstantRange::getNonEmpty(CmpLo, SignMin);
+  }
+
+  return CmpCR;
+}
+
 /// Try to use \p Inst's value range from \p Solver to infer the NUW flag.
 static bool refineInstruction(SCCPSolver &Solver,
                               const SmallPtrSetImpl<Value *> &InsertedValues,
@@ -318,29 +409,24 @@ static Value *simplifyInstruction(SCCPSolver &Solver,
       // Early exit if we know nothing about X.
       if (LRange.isFullSet())
         return nullptr;
-      auto ConvertCRToICmp =
-          [&](const std::optional<ConstantRange> &NewCR) -> Value * {
-        ICmpInst::Predicate Pred;
-        APInt RHS;
-        // Check if we can represent NewCR as an icmp predicate.
-        if (NewCR && NewCR->getEquivalentICmp(Pred, RHS)) {
-          IRBuilder<NoFolder> Builder(&Inst);
-          Value *NewICmp =
-              Builder.CreateICmp(Pred, X, ConstantInt::get(X->getType(), RHS));
-          InsertedValues.insert(NewICmp);
-          return NewICmp;
-        }
-        return nullptr;
-      };
       // We are allowed to refine the comparison to either true or false for out
-      // of range inputs.
-      // Here we refine the comparison to false, and check if we can narrow the
-      // range check to a simpler test.
-      if (auto *V = ConvertCRToICmp(CR->exactIntersectWith(LRange)))
-        return V;
-      // Here we refine the comparison to true, i.e. we relax the range check.
-      if (auto *V = ConvertCRToICmp(CR->exactUnionWith(LRange.inverse())))
-        return V;
+      // of range inputs. Based on this, try to simplify CmpCR as a single
+      // ult/uge/slt/sge/eq/ne.
+      // E.g., CmpCR = [3, 10), LRange = [5, 0) --> NewCmpCR = [0, 10) -> ult
+      ConstantRange NewCmpCR = simplifyCmpRange(*CR, LRange);
+
+      ICmpInst::Predicate Pred;
+      APInt RHS;
+      // If NewCmpCR is just the same as CR, no simplification happens.
+      if (NewCmpCR != *CR) {
+        bool Match [[maybe_unused]] = NewCmpCR.getEquivalentICmp(Pred, RHS);
+        assert(Match && "Incorrect simplifyCmpRange");
+        IRBuilder<NoFolder> Builder(&Inst);
+        Value *NewICmp =
+            Builder.CreateICmp(Pred, X, ConstantInt::get(X->getType(), RHS));
+        InsertedValues.insert(NewICmp);
+        return NewICmp;
+      }
     }
   }
 
diff --git a/llvm/test/Transforms/SCCP/relax-range-checks.ll b/llvm/test/Transforms/SCCP/relax-range-checks.ll
index f7c4f6f468929..998271b3b24d9 100644
--- a/llvm/test/Transforms/SCCP/relax-range-checks.ll
+++ b/llvm/test/Transforms/SCCP/relax-range-checks.ll
@@ -117,7 +117,7 @@ define i1 @range_check_to_icmp_ult(i8 range(i8 2, 10) %x) {
 ; CHECK-LABEL: define i1 @range_check_to_icmp_ult(
 ; CHECK-SAME: i8 range(i8 2, 10) [[X:%.*]]) {
 ; CHECK-NEXT:    [[OFF:%.*]] = add nsw i8 [[X]], -2
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[OFF]], 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[X]], 6
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %off = add i8 %x, -2
@@ -129,7 +129,7 @@ define i1 @range_check_to_icmp_uge(i8 range(i8 2, 6) %x) {
 ; CHECK-LABEL: define i1 @range_check_to_icmp_uge(
 ; CHECK-SAME: i8 range(i8 2, 6) [[X:%.*]]) {
 ; CHECK-NEXT:    [[OFF:%.*]] = add nsw i8 [[X]], -4
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[OFF]], 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i8 [[X]], 4
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %off = add nsw i8 %x, -4
@@ -141,7 +141,7 @@ define i1 @range_check_to_icmp_slt(i8 range(i8 -56, 20) %x) {
 ; CHECK-LABEL: define i1 @range_check_to_icmp_slt(
 ; CHECK-SAME: i8 range(i8 -56, 20) [[X:%.*]]) {
 ; CHECK-NEXT:    [[OFF:%.*]] = add nsw i8 [[X]], 56
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[OFF]], 50
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X]], -6
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %off = add nsw i8 %x, 56
@@ -153,7 +153,7 @@ define i1 @range_check_to_icmp_sge(i8 range(i8 -56, 20) %x) {
 ; CHECK-LABEL: define i1 @range_check_to_icmp_sge(
 ; CHECK-SAME: i8 range(i8 -56, 20) [[X:%.*]]) {
 ; CHECK-NEXT:    [[OFF:%.*]] = add nsw i8 [[X]], 16
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[OFF]], 36
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i8 [[X]], -16
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %off = add nsw i8 %x, 16

>From 2c5b36424159b3dbdfe0bf0baf68626f3da6088c Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Tue, 24 Mar 2026 21:14:03 +0800
Subject: [PATCH 3/9] fix: CmpCR could be simple enough

---
 llvm/lib/Transforms/Utils/SCCPSolver.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index c3414d63ada57..206b4dc0a7691 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/SCCPSolver.h"
-#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -417,10 +416,8 @@ static Value *simplifyInstruction(SCCPSolver &Solver,
 
       ICmpInst::Predicate Pred;
       APInt RHS;
-      // If NewCmpCR is just the same as CR, no simplification happens.
-      if (NewCmpCR != *CR) {
-        bool Match [[maybe_unused]] = NewCmpCR.getEquivalentICmp(Pred, RHS);
-        assert(Match && "Incorrect simplifyCmpRange");
+      // NewCmpCR might be CmpCR, i.e., no simplification happens.
+      if (NewCmpCR.getEquivalentICmp(Pred, RHS)) {
         IRBuilder<NoFolder> Builder(&Inst);
         Value *NewICmp =
             Builder.CreateICmp(Pred, X, ConstantInt::get(X->getType(), RHS));

>From 6554b14e65496cfbf4f416cfe3ab646dae958912 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Tue, 24 Mar 2026 23:44:23 +0800
Subject: [PATCH 4/9] fix: move a guard to its proper location

---
 llvm/lib/Transforms/Utils/SCCPSolver.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 206b4dc0a7691..44a11fb2d745f 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -126,10 +126,6 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
   assert((!KnownCR.isFullSet() && !KnownCR.isEmptySet()) &&
          "Unexpected KnownCR");
 
-  // If KnownCR is both nuw and nsw, we cannot relax CmpCR at all.
-  if (KnownCR.isWrappedSet() && KnownCR.isSignWrappedSet())
-    return CmpCR;
-
   const unsigned BW = CmpCR.getBitWidth();
   // All reachable value satisfy CmpCR --> always true.
   if (CmpCR.contains(KnownCR))
@@ -150,6 +146,10 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
       /*sge*/ CmpHi.isMinSignedValue())
     return *ActCmpCR;
 
+  // If KnownCR is both nuw and nsw, we cannot relax CmpCR at all.
+  if (KnownCR.isWrappedSet() && KnownCR.isSignWrappedSet())
+    return CmpCR;
+
   const APInt Zero = APInt::getZero(BW);
   const APInt SignMin = APInt::getSignedMinValue(BW);
 

>From 42f845f62ff5ce8578613ddb64678fc740c03fe7 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Wed, 25 Mar 2026 00:41:38 +0800
Subject: [PATCH 5/9] fix: suport ne relaxing

---
 llvm/lib/Transforms/Utils/SCCPSolver.cpp | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 44a11fb2d745f..62f49dd7f6a2a 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -146,16 +146,19 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
       /*sge*/ CmpHi.isMinSignedValue())
     return *ActCmpCR;
 
-  // If KnownCR is both nuw and nsw, we cannot relax CmpCR at all.
-  if (KnownCR.isWrappedSet() && KnownCR.isSignWrappedSet())
-    return CmpCR;
-
   const APInt Zero = APInt::getZero(BW);
   const APInt SignMin = APInt::getSignedMinValue(BW);
 
   if (CmpLo == KnownCR.getLower()) {
     // Tie to lower:
 
+    // Try ne
+    //    L------------R   : KnownCR
+    //    L-----------R    : ActiveCmpCR
+    // ---------------RL-- : RelaxedCmpCR
+    if (CmpHi + 1 == KnownCR.getUpper())
+      return ConstantRange::getNonEmpty(KnownCR.getUpper(), CmpHi);
+
     // Try ult
     // 0
     // |  L------------R   : KnownCR
@@ -175,6 +178,14 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
   } else if (CmpHi == KnownCR.getUpper()) {
     // Tie to upper:
 
+    // Try ne
+    //
+    //    L--------R       : KnownCR
+    //     L-------R       : ActiveCmpCR
+    // ---RL-------------- : RelaxedCmpCR
+    if (KnownCR.getLower() + 1 == CmpLo)
+      return ConstantRange::getNonEmpty(CmpLo, KnownCR.getLower());
+
     // Try uge
     // 0
     // |  L--------R       : KnownCR

>From 756d74fcf107c5df3f7d02885d3b6a62a32c7acc Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Wed, 25 Mar 2026 20:42:11 +0800
Subject: [PATCH 6/9] fix: prefer ne rather than ge/lt

---
 llvm/lib/Transforms/Utils/SCCPSolver.cpp | 45 ++++++++++++------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 62f49dd7f6a2a..cfbf733eb52c8 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -134,13 +134,29 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
   std::optional<ConstantRange> ActCmpCR = CmpCR.exactIntersectWith(KnownCR);
   if (!ActCmpCR)
     return CmpCR;
+  // Proof of ActCmpCR cannot be ne:
+  // 1. ActCmpCR = ne ∧ ActCmpCR ⊆ KnownCR -> KnownCR = ActCmpCR/fullset
+  // 2. KnownCR = fullset contradicts KnownCR != fullset
+  // 3. KnownCR = ActCmpCR = KnownCR ∩ CmpCR -> KnownCR ⊆ CmpCR
+  // 4. KnownCR ⊆ CmpCR contradicts KnownCR ⊈ CmpCR
+  assert(/*ne*/ !ActCmpCR->inverse().isSingleElement() && "Unexpected ne");
+
+  // We prefer eq rather than ne.
+  if (/*eq*/ ActCmpCR->isSingleElement())
+    return *ActCmpCR;
+
+  // We prefer ne rather than lt/ge.
+  //    L--------R       : KnownCR       or    L------------R   : KnownCR
+  //     L-------R       : ActiveCmpCR         L-----------R    : ActiveCmpCR
+  // ---RL-------------- : RelaxedCmpCR     ---------------RL-- : RelaxedCmpCR
+  if (const ConstantRange FalseCR = KnownCR.intersectWith(ActCmpCR->inverse());
+      FalseCR.isSingleElement())
+    return FalseCR.inverse();
 
   const APInt &CmpLo = ActCmpCR->getLower(), &CmpHi = ActCmpCR->getUpper();
 
   // If the intersection happens to be the ONE-icmp check, just return it.
-  if (/*eq*/ ActCmpCR->isSingleElement() ||
-      /*ne*/ ActCmpCR->inverse().isSingleElement() ||
-      /*ult*/ CmpLo.isZero() ||
+  if (/*ult*/ CmpLo.isZero() ||
       /*slt*/ CmpLo.isMinSignedValue() ||
       /*uge*/ CmpHi.isZero() ||
       /*sge*/ CmpHi.isMinSignedValue())
@@ -152,14 +168,7 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
   if (CmpLo == KnownCR.getLower()) {
     // Tie to lower:
 
-    // Try ne
-    //    L------------R   : KnownCR
-    //    L-----------R    : ActiveCmpCR
-    // ---------------RL-- : RelaxedCmpCR
-    if (CmpHi + 1 == KnownCR.getUpper())
-      return ConstantRange::getNonEmpty(KnownCR.getUpper(), CmpHi);
-
-    // Try ult
+    // Try ult.
     // 0
     // |  L------------R   : KnownCR
     // |  L---R            : ActiveCmpCR
@@ -167,7 +176,7 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
     if (!KnownCR.isWrappedSet())
       return ConstantRange::getNonEmpty(Zero, CmpHi);
 
-    // Try slt
+    // Try slt.
     //       smin                                 smin
     // -----R  |  L------- : KnownCR        ----R   |  L------- : KnownCR
     //         |  L--R     : ActiveCmpCR    --R     |  L------- : ActiveCmpCR
@@ -178,15 +187,7 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
   } else if (CmpHi == KnownCR.getUpper()) {
     // Tie to upper:
 
-    // Try ne
-    //
-    //    L--------R       : KnownCR
-    //     L-------R       : ActiveCmpCR
-    // ---RL-------------- : RelaxedCmpCR
-    if (KnownCR.getLower() + 1 == CmpLo)
-      return ConstantRange::getNonEmpty(CmpLo, KnownCR.getLower());
-
-    // Try uge
+    // Try uge.
     // 0
     // |  L--------R       : KnownCR
     // |       L---R       : ActiveCmpCR
@@ -194,7 +195,7 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
     if (!KnownCR.isWrappedSet())
       return ConstantRange::getNonEmpty(CmpLo, Zero);
 
-    // Try sge
+    // Try sge.
     //       smin                                 smin
     // -----R  |  L------- : KnownCR        -----R  |  L------- : KnownCR
     //   L--R  |           : ActiveCmpCR    -----R  |      L--- : ActiveCmpCR

>From 80911f3d6b43ece71c03717431a9753d9fb3fb70 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Thu, 26 Mar 2026 01:32:59 +0800
Subject: [PATCH 7/9] Pre-commit tests

---
 .../SCCP/eager-invertible-periodic-mapping.ll |  21 +
 .../invertible-periodic-linear-mapping.ll     | 573 ++++++++++++++++++
 2 files changed, 594 insertions(+)
 create mode 100644 llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll
 create mode 100644 llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll

diff --git a/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll b/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll
new file mode 100644
index 0000000000000..ef59b3d5f448e
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=sccp -S | FileCheck %s --check-prefix=SCCP
+; RUN: opt < %s -passes=ipsccp -S | FileCheck %s --check-prefix=IPSCCP
+
+define i1 @mul_preimage_only_in_late_sccp(i8 range(i8 0, 5) %x) {
+; SCCP-LABEL: define i1 @mul_preimage_only_in_late_sccp(
+; SCCP-SAME: i8 range(i8 0, 5) [[X:%.*]]) {
+; SCCP-NEXT:    [[M:%.*]] = mul nuw nsw i8 [[X]], 17
+; SCCP-NEXT:    [[CMP:%.*]] = icmp slt i8 [[M]], 17
+; SCCP-NEXT:    ret i1 [[CMP]]
+;
+; IPSCCP-LABEL: define i1 @mul_preimage_only_in_late_sccp(
+; IPSCCP-SAME: i8 range(i8 0, 5) [[X:%.*]]) {
+; IPSCCP-NEXT:    [[M:%.*]] = mul nuw nsw i8 [[X]], 17
+; IPSCCP-NEXT:    [[CMP:%.*]] = icmp slt i8 [[M]], 17
+; IPSCCP-NEXT:    ret i1 [[CMP]]
+;
+  %m = mul i8 %x, 17
+  %cmp = icmp slt i8 %m, 17
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll b/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll
new file mode 100644
index 0000000000000..0ce7b25de517a
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll
@@ -0,0 +1,573 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=sccp,dce -S | FileCheck %s
+
+; Test for icmp (mul (zext x), C) to icmp x,
+; if mul is invertible on given predicate constraint
+; Refer to https://github.com/llvm/llvm-project/pull/186347 to understand
+; the mathematical model.
+
+; Comes from https://github.com/llvm/llvm-project/pull/185907#discussion_r2919506475
+; N = 9, M = 27
+; n = 2^9 = 512, m = 2^27 = 134217728, C = 262657
+; k = floor((n - 1) * C / m) = floor(511 * 262657 / 134217728) = 0
+; CR = [-2^26, 262657), Y = [0, 134217728)
+; Invertible: yes
+define i1 @slt_invertible_zext_mul_full_image(<2 x i9> %v) {
+; CHECK-LABEL: define i1 @slt_invertible_zext_mul_full_image(
+; CHECK-SAME: <2 x i9> [[V:%.*]]) {
+; CHECK-NEXT:    [[E:%.*]] = extractelement <2 x i9> [[V]], i64 0
+; CHECK-NEXT:    [[Z:%.*]] = zext i9 [[E]] to i27
+; CHECK-NEXT:    [[M:%.*]] = mul nuw i27 [[Z]], 262657
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i27 [[M]], 262657
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %e = extractelement <2 x i9> %v, i64 0
+  %z = zext i9 %e to i27
+  %m = mul i27 %z, 262657
+  %cmp = icmp slt i27 %m, 262657
+  ret i1 %cmp
+}
+
+; N = 8, M = 16
+; n = 2^8 = 256, m = 2^16 = 65536, C = 257
+; k = floor((n - 1) * C / m) = floor(255 * 257 / 65536) = 0
+; CR = [-2^15, 257), Y = [0, 65536)
+; Invertible: yes
+define i1 @slt_invertible_zext_mul_full_image_i16(i8 %v) {
+; CHECK-LABEL: define i1 @slt_invertible_zext_mul_full_image_i16(
+; CHECK-SAME: i8 [[V:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i8 [[V]] to i16
+; CHECK-NEXT:    [[M:%.*]] = mul nuw i16 [[Z]], 257
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i16 [[M]], 257
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %z = zext i8 %v to i16
+  %m = mul nuw i16 %z, 257
+  %cmp = icmp slt i16 %m, 257
+  ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 17
+; k = floor((n - 1) * C / m) = floor(15 * 17 / 256) = 0
+; CR = [-2^7, 17), Y = [0, 256)
+; Invertible: yes
+define i1 @slt_invertible_zext_mul_full_image_i8(i4 %v) {
+; CHECK-LABEL: define i1 @slt_invertible_zext_mul_full_image_i8(
+; CHECK-SAME: i4 [[V:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[V]] to i8
+; CHECK-NEXT:    [[M:%.*]] = mul nuw i8 [[Z]], 17
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i8 [[M]], 17
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %z = zext i4 %v to i8
+  %m = mul nuw i8 %z, 17
+  %cmp = icmp slt i8 %m, 17
+  ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 10
+; k = floor((n - 1) * C / m) = floor(15 * 10 / 256) = 0
+; CR = [0, 50), Y = [0, 151)
+; Invertible: yes, because CR ⊆ Y
+define i1 @ult_invertible_zext_mul_partial_image(i4 %x) {
+; CHECK-LABEL: define i1 @ult_invertible_zext_mul_partial_image(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT:    [[M:%.*]] = mul nuw i8 [[Z]], 10
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[M]], 50
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %z = zext i4 %x to i8
+  %m = mul i8 %z, 10
+  %cmp = icmp ult i8 %m, 50
+  ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 10
+; k = floor((n - 1) * C / m) = floor(15 * 10 / 256) = 0
+; CR = [0, 200), Y = [0, 151)
+; Invertible: yes
+define i1 @ult_invertible_zext_mul_all_true(i4 %x) {
+; CHECK-LABEL: define i1 @ult_invertible_zext_mul_all_true(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT:    ret i1 true
+;
+  %z = zext i4 %x to i8
+  %m = mul i8 %z, 10
+  %cmp = icmp ult i8 %m, 200
+  ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 10
+; k = floor((n - 1) * C / m) = floor(15 * 10 / 256) = 0
+; CR = [200, 256), Y = [0, 151)
+; Invertible: yes
+define i1 @uge_invertible_zext_mul_all_false(i4 %x) {
+; CHECK-LABEL: define i1 @uge_invertible_zext_mul_all_false(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT:    ret i1 false
+;
+  %z = zext i4 %x to i8
+  %m = mul i8 %z, 10
+  %cmp = icmp uge i8 %m, 200
+  ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CR = [60, 128), Y = [45, 256)
+; Invertible: yes
+define i1 @sge_invertible_tail_of_zext_mul(i4 %x) {
+; CHECK-LABEL: define i1 @sge_invertible_tail_of_zext_mul(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge i8 [[M]], 60
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %z = zext i4 %x to i8
+  %m = mul i8 %z, 20
+  %cmp = icmp sge i8 %m, 60
+  ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CR = [60, 256), Y = [45, 256)
+; Invertible: yes
+define i1 @uge_invertible_tail_of_zext_mul(i4 %x) {
+; CHECK-LABEL: define i1 @uge_invertible_tail_of_zext_mul(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge i8 [[M]], 60
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %z = zext i4 %x to i8
+  %m = mul i8 %z, 20
+  %cmp = icmp uge i8 %m, 60
+  ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 18
+; k = floor((n - 1) * C / m) = floor(15 * 18 / 256) = 1
+; CR = [-2^7, 16), Y = [15, 256)
+; Invertible: yes on Y.inverse() = [16, 2^7)
+define i1 @slt_noninvertible_signed_range_before_tail(i4 %v) {
+; CHECK-LABEL: define i1 @slt_noninvertible_signed_range_before_tail(
+; CHECK-SAME: i4 [[V:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[V]] to i8
+; CHECK-NEXT:    [[CAST:%.*]] = mul nuw i8 [[Z]], 18
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[CAST]], 16
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %z = zext i4 %v to i8
+  %cast = mul nuw i8 %z, 18
+  %cmp = icmp slt i8 %cast, 16
+  ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CR = [0, 45), Y = [45, 256)
+; Invertible: yes on Y.inverse() = [45, 256)
+define i1 @ult_noninvertible_zext_mul_range(i4 %x) {
+; CHECK-LABEL: define i1 @ult_noninvertible_zext_mul_range(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[M]], 45
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %z = zext i4 %x to i8
+  %m = mul i8 %z, 20
+  %cmp = icmp ult i8 %m, 45
+  ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 18
+; k = floor((n - 1) * C / m) = floor(15 * 18 / 256) = 1
+; CR = [0, 16), Y = [15, 256)
+; Invertible: yes on Y.inverse() = [16, 256)
+define i1 @ult_noninvertible_zext_mul_before_tail(i4 %v) {
+; CHECK-LABEL: define i1 @ult_noninvertible_zext_mul_before_tail(
+; CHECK-SAME: i4 [[V:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[V]] to i8
+; CHECK-NEXT:    [[CAST:%.*]] = mul i8 [[Z]], 18
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[CAST]], 16
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %z = zext i4 %v to i8
+  %cast = mul i8 %z, 18
+  %cmp = icmp ult i8 %cast, 16
+  ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CR = [-2^7, 60), Y = [45, 256)
+; Invertible: yes on Y.inverse() = [60, 2^7)
+define i1 @slt_noninvertible_crosses_wrap(i4 %v) {
+; CHECK-LABEL: define i1 @slt_noninvertible_crosses_wrap(
+; CHECK-SAME: i4 [[V:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[V]] to i8
+; CHECK-NEXT:    [[CAST:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[CAST]], 60
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %z = zext i4 %v to i8
+  %cast = mul i8 %z, 20
+  %cmp = icmp slt i8 %cast, 60
+  ret i1 %cmp
+}
+
+; Negative test
+; N = 5, M = 8
+; n = 2^5 = 32, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(31 * 20 / 256) = 2
+; CR = [60, 256), Y = none
+; Invertible: no
+define i1 @uge_noninvertible_multiple_wraps(i5 %x) {
+; CHECK-LABEL: define i1 @uge_noninvertible_multiple_wraps(
+; CHECK-SAME: i5 [[X:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i5 [[X]] to i8
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i8 [[M]], 60
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %z = zext i5 %x to i8
+  %m = mul i8 %z, 20
+  %cmp = icmp uge i8 %m, 60
+  ret i1 %cmp
+}
+
+; Tests for CmpCR built through add.
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CmpCR = [110, 228), Y = [45, 256)
+; Invertible: yes
+define i1 @sge_invertible_tail_of_zext_mul_plus_offset(i4 %x) {
+; CHECK-LABEL: define i1 @sge_invertible_tail_of_zext_mul_plus_offset(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT:    [[A:%.*]] = sub i8 [[M]], 100
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i8 [[A]], 10
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %z = zext i4 %x to i8
+  %m = mul i8 %z, 20
+  %a = sub i8 %m, 100
+  %cmp = icmp sge i8 %a, 10
+  ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CmpCR = [100, 200), Y = [45, 256)
+; Invertible: yes
+define i1 @ult_invertible_zext_mul_plus_offset(i4 %x) {
+; CHECK-LABEL: define i1 @ult_invertible_zext_mul_plus_offset(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT:    [[A:%.*]] = add i8 [[M]], -100
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[A]], 100
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %z = zext i4 %x to i8
+  %m = mul i8 %z, 20
+  %a = add i8 %m, -100
+  %cmp = icmp ult i8 %a, 100
+  ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CmpCR = [122, 60), Y = [45, 256)
+; Invertible: yes on CmpCR.inverse() = [60, 122)
+define i1 @slt_inverse_invertible_zext_mul_plus_offset(i4 %x) {
+; CHECK-LABEL: define i1 @slt_inverse_invertible_zext_mul_plus_offset(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT:    [[A:%.*]] = add i8 [[M]], 6
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[A]], 66
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %z = zext i4 %x to i8
+  %m = mul i8 %z, 20
+  %a = add i8 %m, 6
+  %cmp = icmp slt i8 %a, 66
+  ret i1 %cmp
+}
+
+
+; TODO: support sext
+; Test for icmp (mul (sext x), C) to icmp x.
+
+; Use plain i4 -> i8 sext instead of extra range metadata so the tests cover
+; the extension pattern directly.
+define i1 @sccp_sext_mul_shrinks_to_prefix(i4 %x) {
+; CHECK-LABEL: define i1 @sccp_sext_mul_shrinks_to_prefix(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT:    [[S:%.*]] = sext i4 [[X]] to i8
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[S]], 20
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[M]], 60
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %s = sext i4 %x to i8
+  %m = mul i8 %s, 20
+  %cmp = icmp ult i8 %m, 60
+  ret i1 %cmp
+}
+
+
+define i1 @sccp_sext_mul_shrinks_to_negative_suffix(i4 %x) {
+; CHECK-LABEL: define i1 @sccp_sext_mul_shrinks_to_negative_suffix(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT:    [[S:%.*]] = sext i4 [[X]] to i8
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[S]], 20
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[M]], -60
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %s = sext i4 %x to i8
+  %m = mul i8 %s, 20
+  %cmp = icmp uge i8 %m, 196
+  ret i1 %cmp
+}
+
+
+; Test for icmp (f(x), C) to icmp x
+
+define i1 @sccp_mul_wraps_once_shrinks_to_middle_window(i8 range(i8 0, 18) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_middle_window(
+; CHECK-SAME: i8 range(i8 0, 18) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X]], 20
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge i8 [[M]], 100
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %m = mul i8 %x, 20
+  %cmp = icmp uge i8 %m, 100
+  ret i1 %cmp
+}
+
+define i1 @sccp_urem_wraps_once_shrinks_to_middle_window(i8 range(i8 8, 19) %x) {
+; CHECK-LABEL: define i1 @sccp_urem_wraps_once_shrinks_to_middle_window(
+; CHECK-SAME: i8 range(i8 8, 19) [[X:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = urem i8 [[X]], 10
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 3
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %r = urem i8 %x, 10
+  %cmp = icmp ult i8 %r, 3
+  ret i1 %cmp
+}
+
+define i1 @sccp_negative_mul_wraps_once_shrinks_to_middle_window(i8 range(i8 3, 17) %x) {
+; CHECK-LABEL: define i1 @sccp_negative_mul_wraps_once_shrinks_to_middle_window(
+; CHECK-SAME: i8 range(i8 3, 17) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X]], -20
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[M]], 60
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %m = mul i8 %x, -20
+  %cmp = icmp ult i8 %m, 60
+  ret i1 %cmp
+}
+
+define i1 @sccp_shl_wraps_once_shrinks_to_upper_window(i8 range(i8 5, 23) %x) {
+; CHECK-LABEL: define i1 @sccp_shl_wraps_once_shrinks_to_upper_window(
+; CHECK-SAME: i8 range(i8 5, 23) [[X:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = shl i8 [[X]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge i8 [[Y]], -96
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %y = shl i8 %x, 4
+  %cmp = icmp uge i8 %y, 160
+  ret i1 %cmp
+}
+
+define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window1(i8 range(i8 2, 6) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window1(
+; CHECK-SAME: i8 range(i8 2, 6) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X]], 100
+; CHECK-NEXT:    [[A:%.*]] = add i8 [[M]], 56
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i8 [[A]], 100
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %m = mul i8 %x, 100
+  %a = add i8 %m, -200
+  %cmp = icmp uge i8 %a, 100
+  ret i1 %cmp
+}
+
+; TODO: support wrapped-range
+define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window2(i8 range(i8 -2, 2) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window2(
+; CHECK-SAME: i8 range(i8 -2, 2) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X]], 100
+; CHECK-NEXT:    [[A:%.*]] = add i8 [[M]], 56
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i8 [[A]], 100
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %m = mul i8 %x, 100
+  %a = add i8 %m, -200
+  %cmp = icmp uge i8 %a, 100
+  ret i1 %cmp
+}
+
+define i1 @sccp_mul_wraps_once_shrinks_to_singleton(i8 range(i8 0, 18) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_singleton(
+; CHECK-SAME: i8 range(i8 0, 18) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X]], 20
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[M]], -116
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %m = mul i8 %x, 20
+  %cmp = icmp eq i8 %m, 140
+  ret i1 %cmp
+}
+
+; TODO: support wrapped-range
+; Wrapped-range counterparts for the generic f(x) tests above.
+
+define i1 @sccp_mul_wraps_once_shrinks_to_wrapped_window(i8 range(i8 250, 6) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_wrapped_window(
+; CHECK-SAME: i8 range(i8 -6, 6) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = mul nsw i8 [[X]], 20
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge i8 [[M]], 100
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %m = mul i8 %x, 20
+  %cmp = icmp uge i8 %m, 100
+  ret i1 %cmp
+}
+
+define i1 @sccp_urem_wraps_once_shrinks_to_wrapped_window(i8 range(i8 250, 10) %x) {
+; CHECK-LABEL: define i1 @sccp_urem_wraps_once_shrinks_to_wrapped_window(
+; CHECK-SAME: i8 range(i8 -6, 10) [[X:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = urem i8 [[X]], 10
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge i8 [[R]], 6
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %r = urem i8 %x, 10
+  %cmp = icmp uge i8 %r, 6
+  ret i1 %cmp
+}
+
+define i1 @sccp_negative_mul_wraps_once_shrinks_to_wrapped_window(i8 range(i8 250, 6) %x) {
+; CHECK-LABEL: define i1 @sccp_negative_mul_wraps_once_shrinks_to_wrapped_window(
+; CHECK-SAME: i8 range(i8 -6, 6) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = mul nsw i8 [[X]], -20
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[M]], 60
+; CHECK-NEXT:    ret i1 [[TMP2]]
+;
+  %m = mul i8 %x, -20
+  %cmp = icmp ult i8 %m, 60
+  ret i1 %cmp
+}
+
+define i1 @sccp_shl_wraps_once_shrinks_to_wrapped_suffix(i8 range(i8 250, 6) %x) {
+; CHECK-LABEL: define i1 @sccp_shl_wraps_once_shrinks_to_wrapped_suffix(
+; CHECK-SAME: i8 range(i8 -6, 6) [[X:%.*]]) {
+; CHECK-NEXT:    [[Y:%.*]] = shl nsw i8 [[X]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge i8 [[Y]], -96
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %y = shl i8 %x, 4
+  %cmp = icmp uge i8 %y, 160
+  ret i1 %cmp
+}
+
+define i1 @sccp_mul_with_offset_wraps_once_shrinks_to_wrapped_window(i8 range(i8 250, 6) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_with_offset_wraps_once_shrinks_to_wrapped_window(
+; CHECK-SAME: i8 range(i8 -6, 6) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = mul nsw i8 [[X]], 20
+; CHECK-NEXT:    [[A:%.*]] = add i8 [[M]], 56
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[A]], 37
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %m = mul i8 %x, 20
+  %a = add i8 %m, -200
+  %cmp = icmp ult i8 %a, 37
+  ret i1 %cmp
+}
+
+define i1 @sccp_mul_wraps_once_shrinks_to_wrapped_domain_singleton(i8 range(i8 252, 8) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_wrapped_domain_singleton(
+; CHECK-SAME: i8 range(i8 -4, 8) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X]], 20
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[M]], -116
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %m = mul i8 %x, 20
+  %cmp = icmp eq i8 %m, 140
+  ret i1 %cmp
+}
+
+; Test for vector : f(vec) = C * vec, C is a splat constant.
+
+define <4 x i1> @vec_splat_and(<4 x i32> range(i32 100, 456) %x) {
+; CHECK-LABEL: define <4 x i1> @vec_splat_and(
+; CHECK-SAME: <4 x i32> range(i32 100, 456) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = and <4 x i32> [[X]], splat (i32 255)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge <4 x i32> [[M]], splat (i32 230)
+; CHECK-NEXT:    ret <4 x i1> [[CMP]]
+;
+  %m = and <4 x i32> %x, splat (i32 255)
+  %cmp = icmp uge <4 x i32> %m, splat (i32 230)
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @vec_splat_mul(<4 x i8> range(i8 10, 17) %x) {
+; CHECK-LABEL: define <4 x i1> @vec_splat_mul(
+; CHECK-SAME: <4 x i8> range(i8 10, 17) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = mul <4 x i8> [[X]], splat (i8 50)
+; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i8> [[M]], splat (i8 -40)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <4 x i8> [[ADD]], splat (i8 110)
+; CHECK-NEXT:    ret <4 x i1> [[CMP]]
+;
+  %m = mul <4 x i8> %x, splat (i8 50)
+  %add = add <4 x i8> %m, splat (i8 -40)
+  %cmp = icmp ult <4 x i8> %add, splat (i8 110)
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @vec_splat_urem(<4 x i32> range(i32 1, 10) %x) {
+; CHECK-LABEL: define <4 x i1> @vec_splat_urem(
+; CHECK-SAME: <4 x i32> range(i32 1, 10) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = urem <4 x i32> [[X]], splat (i32 8)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge <4 x i32> [[M]], splat (i32 2)
+; CHECK-NEXT:    ret <4 x i1> [[CMP]]
+;
+  %m = urem <4 x i32> %x, splat (i32 8)
+  %cmp = icmp uge <4 x i32> %m, splat (i32 2)
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @vec_splat_shl(<4 x i8> range(i8 1, 4) %x) {
+; CHECK-LABEL: define <4 x i1> @vec_splat_shl(
+; CHECK-SAME: <4 x i8> range(i8 1, 4) [[X:%.*]]) {
+; CHECK-NEXT:    [[M:%.*]] = shl <4 x i8> [[X]], splat (i8 7)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge <4 x i8> [[M]], splat (i8 100)
+; CHECK-NEXT:    ret <4 x i1> [[CMP]]
+;
+  %m = shl <4 x i8> %x, splat (i8 7)
+  %cmp = icmp uge <4 x i8> %m, splat (i8 100)
+  ret <4 x i1> %cmp
+}

>From 68ce5f59f05bdba916a3b601922647959e839595 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Thu, 26 Mar 2026 01:49:27 +0800
Subject: [PATCH 8/9] =?UTF-8?q?Fold=20y=20=3D=20f(x)=20=3D=20(Cx=20mod=20M?=
 =?UTF-8?q?)=20=E2=88=88=20R=20into=20x=20=E2=88=88=20R'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../llvm/Transforms/Utils/SCCPSolver.h        |   8 +-
 llvm/lib/Transforms/Scalar/SCCP.cpp           |   3 +-
 llvm/lib/Transforms/Utils/SCCPSolver.cpp      | 401 +++++++++++++++++-
 .../SCCP/eager-invertible-periodic-mapping.ll |   2 +-
 .../invertible-periodic-linear-mapping.ll     |  92 ++--
 5 files changed, 433 insertions(+), 73 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
index 5aac7c2ac5d3e..f9fcd17662f90 100644
--- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
+++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
@@ -194,10 +194,16 @@ class SCCPSolver {
   LLVM_ABI void visit(Instruction *I);
   LLVM_ABI void visitCall(CallInst &I);
 
+  /// Simplify instructions in \p BB using the solver's lattice information.
+  /// When \p Eager is true, also apply more aggressive folds that may rewrite
+  /// IR into forms less friendly to earlier canonicalization passes. Keep eager
+  /// mode for later optimization points where exposing extra range-based folds
+  /// outweighs the risk of hiding canonical patterns.
   LLVM_ABI bool simplifyInstsInBlock(BasicBlock &BB,
                                      SmallPtrSetImpl<Value *> &InsertedValues,
                                      Statistic &InstRemovedStat,
-                                     Statistic &InstReplacedStat);
+                                     Statistic &InstReplacedStat,
+                                     bool Eager = false);
 
   LLVM_ABI bool removeNonFeasibleEdges(BasicBlock *BB, DomTreeUpdater &DTU,
                                        BasicBlock *&NewUnreachableBB) const;
diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index feee794ffeae1..eada8efddd8ff 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -102,7 +102,8 @@ static bool runSCCP(Function &F, const DataLayout &DL,
     }
 
     MadeChanges |= Solver.simplifyInstsInBlock(BB, InsertedValues,
-                                               NumInstRemoved, NumInstReplaced);
+                                               NumInstRemoved, NumInstReplaced,
+                                               /*Eager=*/true);
   }
 
   // Remove unreachable blocks and non-feasible edges.
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index cfbf733eb52c8..4930ce01d13b9 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/SCCPSolver.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -29,9 +30,9 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <cassert>
+#include <optional>
 #include <utility>
 #include <vector>
 
@@ -105,6 +106,310 @@ static ConstantRange getRange(Value *Op, SCCPSolver &Solver,
                                                        /*UndefAllowed=*/false);
 }
 
+namespace {
+
+// Sibling of ConstantRange::getNonEmptyRange
+ConstantRange getMightEmptyRange(const APInt &L, const APInt &R) {
+  return L == R ? ConstantRange::getEmpty(L.getBitWidth())
+                : ConstantRange(L, R);
+}
+
+/// Represents periodic mapping f(x) = Cx mod M
+class ModularMulMapping {
+public:
+  ModularMulMapping(const APInt &C, const APInt &M)
+      : MulC(C), Modulus(M), IsFullMod(M.isZero()), IsURem(C.isOne()),
+        WideBits(C.getBitWidth() * 2), WideMulC(C.zext(WideBits)),
+        WideModulus(M.zext(WideBits)) {
+    assert(C.isStrictlyPositive() && "Expected a positive multiplier C");
+    assert(!IsURem || !M.isZero() && "Expected a valid modulus M if C = 1");
+  }
+
+  // Mapping: f(x) = Cx mod M = ((C mod M) * (x mod M)) mod M
+  APInt operator()(const APInt &x) const {
+    assert(x.getBitWidth() == MulC.getBitWidth() &&
+           "The bit width of x and C should be equal for f(x) = Cx");
+    // Fast path for f(x) = Cx
+    if (IsFullMod)
+      return MulC * x;
+    // Fast path for f(x) = x mod M
+    if (IsURem)
+      return x.urem(Modulus);
+    return (WideMulC * x.zext(WideBits))
+        .urem(WideModulus)
+        .trunc(MulC.getBitWidth());
+  }
+
+  ConstantRange getInvertibleImage(const ConstantRange &SrcCR,
+                                   bool &IsDomainReturned) const {
+    assert(!SrcCR.isEmptySet() && "Expected non-empty SrcCR");
+    return SrcCR.isWrappedSet()
+               ? getInvertibleImageOnWrappedX(SrcCR, IsDomainReturned)
+               : getInvertibleImageOnUnwrappedX(SrcCR, IsDomainReturned);
+  }
+
+private:
+  ConstantRange getInvertibleImageOnUnwrappedX(const ConstantRange &SrcCR,
+                                               bool &IsDomainReturned) const {
+    assert(!SrcCR.isWrappedSet() && "Expected nuw SrcCR");
+    const unsigned BW = MulC.getBitWidth();
+
+    const APInt &XLo =
+                    SrcCR.isFullSet() ? APInt::getZero(BW) : SrcCR.getLower(),
+                &XHi =
+                    SrcCR.isFullSet() ? APInt::getZero(BW) : SrcCR.getUpper();
+    // [Lo, Hi) = [Lo, Hi-1]
+    const APInt RangeSize = XHi - XLo - 1;
+
+    // Periods k = floor(|Range| / T)
+    //           = floor((SrcCR.size() - 1) / (M / C))
+    //           = floor((SrcCR.size() - 1) * C / M)
+    const APInt SizeMulC = RangeSize.zext(WideBits) * WideMulC;
+    const APInt Periods =
+        IsFullMod ? SizeMulC.lshr(BW) : SizeMulC.udiv(WideModulus);
+
+    if (Periods.isZero()) {
+      // k = 0: the walk never wraps, so the mapping is invertible over the full
+      // result space. Here we return the reachable image/domain
+      //           Y = f(CR) = [f(Lo), f(Hi)).
+      // Consider discreteness, Y = [ f(Lo) , f(Hi - 1) + 1 )
+      // f(x) = Cx:    Y |   /
+      //               Y |  /
+      //                 |
+      //               Y |    /
+      //                 └-----------
+      //                    XXX
+      IsDomainReturned = true;
+      // SrcCR is not emptyset -> Y cannot be emptyset but might be fullset.
+      return ConstantRange::getNonEmpty((*this)(XLo),
+                                        modularAdd((*this)(XHi - 1), 1));
+    }
+
+    if (Periods.isOne()) {
+      // k = 1: only the unique part invertible.
+      // As f(x) walks along f(Lo) --> ⊤ --> ⊥ --> f(Hi),
+      // the repeated part is [f(Lo), f(Hi)) and the invertiable part
+      //           Y = [ f(Hi), f(Lo) )
+      // Consider discreteness, Y = [ f(Hi - 1) + 1 , f(Lo) )
+      // f(x) = Cx:    Y |   /
+      //               Y |  /
+      //                 | /   /
+      //               Y |    /
+      //                 └-----------
+      //                   ~XXX~
+      IsDomainReturned = false;
+      // [f(Lo), f(Hi)) repeated -> Y cannot be fullset but might be emptyset.
+      return getMightEmptyRange(modularAdd((*this)(XHi - 1), 1), (*this)(XLo));
+    }
+    // k >= 2: the walk overlaps itself too much to have a unique inverse.
+    // f(x) = Cx:        |   /   /
+    //                   |  /   /
+    // Y does not exist. | /   /   /
+    //                   |    /   /
+    //                   └-------------
+    return ConstantRange::getEmpty(BW);
+  }
+  /// If the range of x is wrapped, the linearity of `f(x) = Cx mod M` breaks on
+  /// the end. E.g., as follows, the invertibe image Y is continuos, but the
+  /// related pre-image X is not.
+  /// f(x) = Cx:       Y|   /
+  ///                   |  /      /
+  ///                   | /      /
+  ///                  Y|/
+  ///                   └----------
+  ///                    X~~X    ~~
+  ConstantRange getInvertibleImageOnWrappedX(const ConstantRange &SrcCR,
+                                             bool &IsDomainReturned) const {
+    assert(SrcCR.isWrappedSet() && "Expected wrapped SrcCR");
+    // FIXME: support wrapped SrcCR.
+    return ConstantRange::getEmpty(MulC.getBitWidth());
+  }
+  APInt modularAdd(const APInt &LHS, int RHS) const {
+    return Modulus.isZero() ? LHS + RHS : (RHS + LHS).urem(Modulus);
+  };
+
+  const APInt &MulC;
+  const APInt &Modulus;
+  const bool IsFullMod;
+  const bool IsURem;
+  const unsigned WideBits;
+  const APInt WideMulC;
+  const APInt WideModulus;
+};
+} // namespace
+
+/// Refer to https://github.com/llvm/llvm-project/pull/186347 for the
+/// underlying math model.
+///
+/// Given a result constraint CR on y = f(x) = Step * x mod Modulus and a
+/// source domain X = SrcCR, try to compute a single interval CR' = f^{-1}(CR)
+/// on x.
+///
+/// We first compute the invertible result interval Y for x \in SrcCR, then try
+/// to apply f^{-1} on CR or CR.inverse(). This is valid iff CR ⊆ Y, or iff CR
+/// intersects the reachable image when Y itself is the domain. Modulus == 0
+/// denotes the full iBW ring, i.e. mod 2^BW.
+static std::optional<ConstantRange>
+getPreImageOfModularMul(const ConstantRange &CmpCR, const ConstantRange &SrcCR,
+                        const APInt &C, const APInt &Modulus) {
+  assert(!C.isZero() && "Expected a non-zero periodic coefficient");
+  assert(!CmpCR.isEmptySet() && "Unexpected empty constraint set");
+  assert(!CmpCR.isFullSet() && "Unexpected full constraint set");
+  assert(!SrcCR.isEmptySet() && "Unexpected empty input set");
+
+  const unsigned BW = C.getBitWidth();
+
+  ConstantRange Domain =
+      ConstantRange::getNonEmpty(APInt::getZero(BW), Modulus);
+
+  auto NegateRange = [](const ConstantRange &CR) -> ConstantRange {
+    // negate([L,R)) = - [L, R) = [1 - R, 1 - L)
+    return ConstantRange::getNonEmpty(1 - CR.getUpper(), 1 - CR.getLower());
+  };
+  // y = Cx \in CR --> -y = -Cx \in negate(CR)
+  const ConstantRange &ActiveCmpCR =
+      C.isNegative() ? NegateRange(CmpCR) : CmpCR;
+  const APInt &Step = C.isNegative() ? -C : C;
+
+  const ModularMulMapping Mapping{Step, Modulus};
+
+  // ==================================================================== //
+  // 1. Calculate the invertible interval Y for f(x) = Cx mod M.
+  // ==================================================================== //
+
+  bool IsDomainReturned = false;
+  const ConstantRange Y = Mapping.getInvertibleImage(SrcCR, IsDomainReturned);
+
+  if (Y.isEmptySet())
+    return std::nullopt;
+
+  if (IsDomainReturned)
+    Domain = Y;
+
+  // ==================================================================== //
+  // 2. Calculate the equivalent range X via f^{-1} on CmpCR.
+  // ==================================================================== //
+  auto ModularSub = [&Modulus](const APInt &LHS, const APInt &RHS) {
+    return (Modulus.isZero() || LHS.uge(RHS)) ? LHS - RHS : Modulus - RHS + LHS;
+  };
+  // Try to map CmpRange to its pre-image, i.e., f^{-1}(CR).
+  auto TryGetPreImage =
+      [&](const ConstantRange &CR) -> std::optional<ConstantRange> {
+    if (CR.contains(Domain))
+      return /* Domain ⊆ CR*/ ConstantRange::getFull(BW);
+    if (CR.inverse().contains(Domain))
+      return /* Domain ∩ CR = ∅ */ ConstantRange::getEmpty(BW);
+
+    // ActiveCR is the reachable part of CR.
+    // ActiveCmpY = null if
+    //        L-------U    : Domain   or    --U   L----- : Domain
+    //        --U   L----- : CR             L-------U    : CR
+    std::optional<ConstantRange> ActiveCR = Domain.exactIntersectWith(CR);
+    // If ActiveCmpY = null or ActiveCR ⊈ the invertible image Y,
+    // there are >1 separate intervals of x, making Cx ∈ CR.
+    // I.e., we cannot derive a single X.
+    if (!ActiveCR || !Y.contains(*ActiveCR))
+      return std::nullopt;
+
+    // ActiveCR.Hi is not belong to Y, thus we use Y1 = ActiveCR.Hi - 1
+    const APInt &Y0 = ActiveCR->getLower(),
+                &Y1 = ModularSub(ActiveCR->getUpper(), APInt(BW, 1));
+    // Fast path for Lo = 0: X = [ y0 / C , y1 / C )
+    if (SrcCR.getLower().isZero()) {
+      // f(x) = Cx:    Y |   /        X must be Y / C without mod directly.
+      //               Y |  /
+      //                 | /   /
+      //                 |/   /
+      //                 └---------
+      //                  ~~XX~~
+      const APInt X0 = APIntOps::RoundingUDiv(Y0, Step, APInt::Rounding::UP),
+                  X1 = APIntOps::RoundingUDiv(Y1, Step, APInt::Rounding::DOWN) +
+                       1;
+      return getMightEmptyRange(X0, X1);
+    }
+
+    // Given SrcCR = [Lo, Hi) and invertible interval CR = [y0, y1],
+    // we need to find X = [x0, x1] ⊆ SrcCR, s.t., f(X) = CR.
+    // I.e., y0 = f(x0)
+    //       DeltaY = y0 - f(Lo) = f(x0) - f(Lo) = f(x0 - Lo) = C * DeltaX
+    //       DeltaX     = DeltaY / C = (y0 - f(Lo)) / C
+    //       x0         = Lo + DeltaX
+    // As y0, f(Lo) ∈ [0, M), we do need to consider modulus.
+    // x1 shares the same derivation.
+    // Considering discreteness, we need to adjust X = [A,B) properly as
+    // follows.
+    //    X = [ceil(x0), ceil(x1))
+    const APInt LoY = Mapping(SrcCR.getLower());
+    const APInt DeltaY0 = ModularSub(Y0, LoY), DeltaY1 = ModularSub(Y1, LoY);
+    const APInt DeltaX0 =
+        APIntOps::RoundingUDiv(DeltaY0, Step, APInt::Rounding::UP);
+    const APInt DeltaX1 =
+        APIntOps::RoundingUDiv(DeltaY1, Step, APInt::Rounding::DOWN);
+    const APInt X0 = SrcCR.getLower() + DeltaX0,
+                X1 = SrcCR.getLower() + DeltaX1 + 1;
+    const ConstantRange X = getMightEmptyRange(X0, X1);
+    assert(SrcCR.contains(X) && "X should be subset of SrcCR");
+    return X;
+  };
+
+  // Try to get single X = f^{-1}(CmpCR) to make Cx ∈ CmpCR.
+  if (auto X = TryGetPreImage(ActiveCmpCR))
+    return *X;
+
+  // Try to get single X = f^{-1}(CmpCR.inverse()).inverse() to make Cx ∈ CmpCR.
+  if (auto X = TryGetPreImage(ActiveCmpCR.inverse()))
+    return X->inverse();
+
+  return std::nullopt;
+}
+
+/// Given CmpCR constraining y = f(x) and SCCP's known range SrcCR for x, try to
+/// rewrite the constraint as a single ConstantRange on x.
+///
+/// This only handles mappings that the current solver models via
+/// getPreImageOfModularMul():
+///   - mul  x, C  : y = C * x mod 2^BW
+///   - shl  x, C  : y = (2^C) * x mod 2^BW
+///   - urem x, C  : y = x mod C
+///   - and  x, C  : y = x mod C+1 if C is low-bit mask
+///
+/// Returns nullopt if the reachable image from SrcCR does not admit one
+/// invertible interval, or if the preimage of CmpCR cannot be expressed as one
+/// ConstantRange.
+static std::optional<ConstantRange> getPreImageOfInvertiblePeriodicMapping(
+    unsigned Opcode, Value *X, const APInt &C, const ConstantRange &SrcCR,
+    const ConstantRange &CmpCR) {
+  // We support interger vector/scalar.
+  // For vector, the mapping must be fixed, i.e., splat C.
+  assert(X->getType()->getScalarType()->isIntegerTy() &&
+         "Only support integer mapping");
+
+  // TODO: Support srem and other more complex periodic mappings.
+  std::optional<ConstantRange> SrcCmpCR;
+  switch (Opcode) {
+  case Instruction::Mul:
+    // y = C*x = C*x mod (MAX + 1)
+    return getPreImageOfModularMul(CmpCR, SrcCR, C, APInt(C.getBitWidth(), 0));
+  case Instruction::Shl:
+    // y = x << C = 2^C * x mod (MAX + 1)
+    return getPreImageOfModularMul(
+        CmpCR, SrcCR, APInt::getOneBitSet(C.getBitWidth(), C.getZExtValue()),
+        APInt(C.getBitWidth(), 0));
+  case Instruction::And:
+    assert(C.isMask() && "Expected a low-bit mask C");
+    // y = x & C = 1 * x mod C + 1
+    return getPreImageOfModularMul(CmpCR, SrcCR, APInt(C.getBitWidth(), 1),
+                                   C + 1);
+  case Instruction::URem:
+    // y = x % C = 1 * x mod C
+    return getPreImageOfModularMul(CmpCR, SrcCR, APInt(C.getBitWidth(), 1), C);
+  default:
+    assert(false && "Unsupported invertible periodic linear mapping opcode");
+  }
+
+  return std::nullopt;
+}
+
 /// SCCP already proves x \in KnownCR, so only ActiveCmpCR = CmpCR ∩ KnownCR
 /// matters. Try to replace CmpCR with a simpler equivalent range NewCmpCR
 /// such that NewCmpCR ∩ KnownCR == ActiveCmpCR.
@@ -343,7 +648,7 @@ static bool replaceSignedInst(SCCPSolver &Solver,
 /// Try to use \p Inst's value range from \p Solver to simplify it.
 static Value *simplifyInstruction(SCCPSolver &Solver,
                                   SmallPtrSetImpl<Value *> &InsertedValues,
-                                  Instruction &Inst) {
+                                  Instruction &Inst, bool Eager) {
   auto GetRange = [&Solver, &InsertedValues](Value *Op) {
     return getRange(Op, Solver, InsertedValues);
   };
@@ -389,42 +694,97 @@ static Value *simplifyInstruction(SCCPSolver &Solver,
     return Sub;
   }
 
-  // Relax range checks.
+  // Check if we can relax icmp Pred, Y, ... to a simpler form.
   if (auto *ICmp = dyn_cast<ICmpInst>(&Inst)) {
-    Value *X;
-    auto MatchTwoInstructionExactRangeCheck =
-        [&]() -> std::optional<ConstantRange> {
+    Value *Y;
+    bool IsOneUse = false, IsTwoInstRangeCheck = true;
+    auto MatchExactRangeCheck = [&]() -> std::optional<ConstantRange> {
       const APInt *RHSC;
+      // Match icmp Pred LHS, C
       if (!match(ICmp->getOperand(1), m_APInt(RHSC)))
         return std::nullopt;
 
       Value *LHS = ICmp->getOperand(0);
       ICmpInst::Predicate Pred = ICmp->getPredicate();
       const APInt *Offset;
-      if (match(LHS, m_OneUse(m_AddLike(m_Value(X), m_APInt(Offset)))))
-        return ConstantRange::makeExactICmpRegion(Pred, *RHSC).sub(*Offset);
-      // Match icmp eq/ne X & NegPow2, C
+      IsOneUse = LHS->hasOneUse();
+      if (!IsOneUse)
+        return std::nullopt;
+      const ConstantRange ExactCmpCR =
+          ConstantRange::makeExactICmpRegion(Pred, *RHSC);
+      // Match icmp Pred Y + C1, C2
+      if (match(LHS, m_AddLike(m_Value(Y), m_APInt(Offset))))
+        return ExactCmpCR.sub(*Offset);
+      // Match icmp Pred Y - C1, C2
+      if (match(LHS, m_Sub(m_Value(Y), m_APInt(Offset))))
+        return ExactCmpCR.add(*Offset);
+      // Match icmp eq/ne Y & NegPow2, C
       if (ICmp->isEquality()) {
         const APInt *Mask;
-        if (match(LHS, m_OneUse(m_And(m_Value(X), m_NegatedPower2(Mask)))) &&
+        if (match(LHS, m_And(m_Value(Y), m_NegatedPower2(Mask))) &&
             RHSC->countr_zero() >= Mask->countr_zero()) {
           ConstantRange CR(*RHSC, *RHSC - *Mask);
           return Pred == ICmpInst::ICMP_EQ ? CR : CR.inverse();
         }
       }
-      return std::nullopt;
+      IsTwoInstRangeCheck = false;
+      Y = LHS;
+      return ExactCmpCR;
     };
 
-    if (auto CR = MatchTwoInstructionExactRangeCheck()) {
-      ConstantRange LRange = GetRange(X);
-      // Early exit if we know nothing about X.
-      if (LRange.isFullSet())
+    // Match icmp Pred, (op Y, C1), C2 as Y ∈ CmpCR.
+    if (auto CmpCR = MatchExactRangeCheck()) {
+
+      // TODO: support more mappings f
+      // FIXME: should we treat trunc as x % 2^N?
+      // In eager mode, try to simplify Y = f(X) ∈ CR into X ∈ CR'. This is a
+      // more aggressive rewrite that can expose additional SCCP opportunities,
+      // but may also hide canonical forms expected by earlier passes.
+      if (const APInt *C;
+          Eager && /* the sole use of y = f(x) is icmp */ Y->hasOneUse() &&
+          (match(Y, m_c_Mul(m_Value(X), m_APInt(C))) ||
+           match(Y, m_Shl(m_Value(X), m_APInt(C))) ||
+           match(Y, m_URem(m_Value(X), m_APInt(C))) ||
+           match(Y, m_And(m_Value(X), m_LowBitMask(C))))) {
+        ConstantRange XRange = GetRange(X);
+        if (auto XCmpCR = getPreImageOfInvertiblePeriodicMapping(
+                cast<Instruction>(Y)->getOpcode(), X, *C, XRange, *CmpCR)) {
+          // Use XRange to implify XCmpCR. E.g.:
+          // XCmpCR = [5, 10), *XCmpCR = [5, 0) --> NewCmpCR = [0, 10) -> ult
+          *XCmpCR = simplifyCmpRange(*XCmpCR, XRange);
+
+          // Emit XCmpCR as icmp Pred (X + C1), C2
+          ICmpInst::Predicate Pred;
+          APInt RHS, Offset;
+          XCmpCR->getEquivalentICmp(Pred, RHS, Offset);
+
+          IRBuilder<NoFolder> Builder(&Inst);
+          if (!Offset.isZero()) {
+            X = Builder.CreateAdd(X, ConstantInt::get(X->getType(), Offset));
+            InsertedValues.insert(X);
+          }
+
+          Value *NewICmp =
+              Builder.CreateICmp(Pred, X, ConstantInt::get(X->getType(), RHS));
+          InsertedValues.insert(NewICmp);
+          return NewICmp;
+        }
+      }
+
+      // Given Y ∈ YRange, try to simplify: Y ∈ CR --> Y ∈ CR'
+      ConstantRange YRange = GetRange(Y);
+
+      // Early exit if
+      //  1. we know nothing about Y or
+      //  2. LHS has >1 uses (tuned by llvm-opt-bench) or
+      //  3. this ICMP is not two-inst range check.
+      if (YRange.isFullSet() || !IsOneUse || !IsTwoInstRangeCheck)
         return nullptr;
       // We are allowed to refine the comparison to either true or false for out
       // of range inputs. Based on this, try to simplify CmpCR as a single
       // ult/uge/slt/sge/eq/ne.
-      // E.g., CmpCR = [3, 10), LRange = [5, 0) --> NewCmpCR = [0, 10) -> ult
-      ConstantRange NewCmpCR = simplifyCmpRange(*CR, LRange);
+      // E.g., CmpCR = [3, 10), YRange = [5, 0) --> NewCmpCR = [0, 10) -> ult
+      ConstantRange NewCmpCR = simplifyCmpRange(*CmpCR, YRange);
 
       ICmpInst::Predicate Pred;
       APInt RHS;
@@ -432,7 +792,7 @@ static Value *simplifyInstruction(SCCPSolver &Solver,
       if (NewCmpCR.getEquivalentICmp(Pred, RHS)) {
         IRBuilder<NoFolder> Builder(&Inst);
         Value *NewICmp =
-            Builder.CreateICmp(Pred, X, ConstantInt::get(X->getType(), RHS));
+            Builder.CreateICmp(Pred, Y, ConstantInt::get(Y->getType(), RHS));
         InsertedValues.insert(NewICmp);
         return NewICmp;
       }
@@ -445,7 +805,7 @@ static Value *simplifyInstruction(SCCPSolver &Solver,
 bool SCCPSolver::simplifyInstsInBlock(BasicBlock &BB,
                                       SmallPtrSetImpl<Value *> &InsertedValues,
                                       Statistic &InstRemovedStat,
-                                      Statistic &InstReplacedStat) {
+                                      Statistic &InstReplacedStat, bool Eager) {
   bool MadeChanges = false;
   for (Instruction &Inst : make_early_inc_range(BB)) {
     if (Inst.getType()->isVoidTy())
@@ -461,7 +821,8 @@ bool SCCPSolver::simplifyInstsInBlock(BasicBlock &BB,
       ++InstReplacedStat;
     } else if (refineInstruction(*this, InsertedValues, Inst)) {
       MadeChanges = true;
-    } else if (auto *V = simplifyInstruction(*this, InsertedValues, Inst)) {
+    } else if (auto *V =
+                   simplifyInstruction(*this, InsertedValues, Inst, Eager)) {
       Inst.replaceAllUsesWith(V);
       Inst.eraseFromParent();
       ++InstRemovedStat;
diff --git a/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll b/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll
index ef59b3d5f448e..170659ad43c5b 100644
--- a/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll
+++ b/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll
@@ -6,7 +6,7 @@ define i1 @mul_preimage_only_in_late_sccp(i8 range(i8 0, 5) %x) {
 ; SCCP-LABEL: define i1 @mul_preimage_only_in_late_sccp(
 ; SCCP-SAME: i8 range(i8 0, 5) [[X:%.*]]) {
 ; SCCP-NEXT:    [[M:%.*]] = mul nuw nsw i8 [[X]], 17
-; SCCP-NEXT:    [[CMP:%.*]] = icmp slt i8 [[M]], 17
+; SCCP-NEXT:    [[CMP:%.*]] = icmp eq i8 [[X]], 0
 ; SCCP-NEXT:    ret i1 [[CMP]]
 ;
 ; IPSCCP-LABEL: define i1 @mul_preimage_only_in_late_sccp(
diff --git a/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll b/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll
index 0ce7b25de517a..fb176b397c520 100644
--- a/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll
+++ b/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll
@@ -17,8 +17,8 @@ define i1 @slt_invertible_zext_mul_full_image(<2 x i9> %v) {
 ; CHECK-SAME: <2 x i9> [[V:%.*]]) {
 ; CHECK-NEXT:    [[E:%.*]] = extractelement <2 x i9> [[V]], i64 0
 ; CHECK-NEXT:    [[Z:%.*]] = zext i9 [[E]] to i27
-; CHECK-NEXT:    [[M:%.*]] = mul nuw i27 [[Z]], 262657
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i27 [[M]], 262657
+; CHECK-NEXT:    [[TMP1:%.*]] = add i27 [[Z]], -256
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i27 [[TMP1]], -255
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %e = extractelement <2 x i9> %v, i64 0
@@ -37,8 +37,8 @@ define i1 @slt_invertible_zext_mul_full_image_i16(i8 %v) {
 ; CHECK-LABEL: define i1 @slt_invertible_zext_mul_full_image_i16(
 ; CHECK-SAME: i8 [[V:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = zext i8 [[V]] to i16
-; CHECK-NEXT:    [[M:%.*]] = mul nuw i16 [[Z]], 257
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i16 [[M]], 257
+; CHECK-NEXT:    [[TMP1:%.*]] = add i16 [[Z]], -128
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i16 [[TMP1]], -127
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %z = zext i8 %v to i16
@@ -56,8 +56,8 @@ define i1 @slt_invertible_zext_mul_full_image_i8(i4 %v) {
 ; CHECK-LABEL: define i1 @slt_invertible_zext_mul_full_image_i8(
 ; CHECK-SAME: i4 [[V:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[V]] to i8
-; CHECK-NEXT:    [[M:%.*]] = mul nuw i8 [[Z]], 17
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt i8 [[M]], 17
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Z]], -8
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], -7
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %z = zext i4 %v to i8
@@ -75,8 +75,7 @@ define i1 @ult_invertible_zext_mul_partial_image(i4 %x) {
 ; CHECK-LABEL: define i1 @ult_invertible_zext_mul_partial_image(
 ; CHECK-SAME: i4 [[X:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT:    [[M:%.*]] = mul nuw i8 [[Z]], 10
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[M]], 50
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i8 [[Z]], 5
 ; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %z = zext i4 %x to i8
@@ -126,8 +125,8 @@ define i1 @sge_invertible_tail_of_zext_mul(i4 %x) {
 ; CHECK-LABEL: define i1 @sge_invertible_tail_of_zext_mul(
 ; CHECK-SAME: i4 [[X:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp sge i8 [[M]], 60
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Z]], -3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 4
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %z = zext i4 %x to i8
@@ -145,8 +144,8 @@ define i1 @uge_invertible_tail_of_zext_mul(i4 %x) {
 ; CHECK-LABEL: define i1 @uge_invertible_tail_of_zext_mul(
 ; CHECK-SAME: i4 [[X:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge i8 [[M]], 60
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Z]], -3
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 10
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %z = zext i4 %x to i8
@@ -164,8 +163,8 @@ define i1 @slt_noninvertible_signed_range_before_tail(i4 %v) {
 ; CHECK-LABEL: define i1 @slt_noninvertible_signed_range_before_tail(
 ; CHECK-SAME: i4 [[V:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[V]] to i8
-; CHECK-NEXT:    [[CAST:%.*]] = mul nuw i8 [[Z]], 18
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[CAST]], 16
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Z]], -8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -7
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %z = zext i4 %v to i8
@@ -183,8 +182,8 @@ define i1 @ult_noninvertible_zext_mul_range(i4 %x) {
 ; CHECK-LABEL: define i1 @ult_noninvertible_zext_mul_range(
 ; CHECK-SAME: i4 [[X:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[M]], 45
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Z]], -13
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -10
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %z = zext i4 %x to i8
@@ -202,8 +201,8 @@ define i1 @ult_noninvertible_zext_mul_before_tail(i4 %v) {
 ; CHECK-LABEL: define i1 @ult_noninvertible_zext_mul_before_tail(
 ; CHECK-SAME: i4 [[V:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[V]] to i8
-; CHECK-NEXT:    [[CAST:%.*]] = mul i8 [[Z]], 18
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[CAST]], 16
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Z]], -15
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -14
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %z = zext i4 %v to i8
@@ -221,8 +220,8 @@ define i1 @slt_noninvertible_crosses_wrap(i4 %v) {
 ; CHECK-LABEL: define i1 @slt_noninvertible_crosses_wrap(
 ; CHECK-SAME: i4 [[V:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[V]] to i8
-; CHECK-NEXT:    [[CAST:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[CAST]], 60
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Z]], -7
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -4
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %z = zext i4 %v to i8
@@ -262,9 +261,8 @@ define i1 @sge_invertible_tail_of_zext_mul_plus_offset(i4 %x) {
 ; CHECK-LABEL: define i1 @sge_invertible_tail_of_zext_mul_plus_offset(
 ; CHECK-SAME: i4 [[X:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT:    [[A:%.*]] = sub i8 [[M]], 100
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i8 [[A]], 10
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Z]], -6
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], 6
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %z = zext i4 %x to i8
@@ -283,9 +281,8 @@ define i1 @ult_invertible_zext_mul_plus_offset(i4 %x) {
 ; CHECK-LABEL: define i1 @ult_invertible_zext_mul_plus_offset(
 ; CHECK-SAME: i4 [[X:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT:    [[A:%.*]] = add i8 [[M]], -100
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[A]], 100
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Z]], -5
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 5
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %z = zext i4 %x to i8
@@ -304,9 +301,8 @@ define i1 @slt_inverse_invertible_zext_mul_plus_offset(i4 %x) {
 ; CHECK-LABEL: define i1 @slt_inverse_invertible_zext_mul_plus_offset(
 ; CHECK-SAME: i4 [[X:%.*]]) {
 ; CHECK-NEXT:    [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT:    [[A:%.*]] = add i8 [[M]], 6
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[A]], 66
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Z]], -7
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], -4
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %z = zext i4 %x to i8
@@ -357,8 +353,8 @@ define i1 @sccp_sext_mul_shrinks_to_negative_suffix(i4 %x) {
 define i1 @sccp_mul_wraps_once_shrinks_to_middle_window(i8 range(i8 0, 18) %x) {
 ; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_middle_window(
 ; CHECK-SAME: i8 range(i8 0, 18) [[X:%.*]]) {
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X]], 20
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge i8 [[M]], 100
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], -5
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %m = mul i8 %x, 20
@@ -369,7 +365,7 @@ define i1 @sccp_mul_wraps_once_shrinks_to_middle_window(i8 range(i8 0, 18) %x) {
 define i1 @sccp_urem_wraps_once_shrinks_to_middle_window(i8 range(i8 8, 19) %x) {
 ; CHECK-LABEL: define i1 @sccp_urem_wraps_once_shrinks_to_middle_window(
 ; CHECK-SAME: i8 range(i8 8, 19) [[X:%.*]]) {
-; CHECK-NEXT:    [[TMP1:%.*]] = urem i8 [[X]], 10
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], -10
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 3
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
@@ -381,8 +377,8 @@ define i1 @sccp_urem_wraps_once_shrinks_to_middle_window(i8 range(i8 8, 19) %x)
 define i1 @sccp_negative_mul_wraps_once_shrinks_to_middle_window(i8 range(i8 3, 17) %x) {
 ; CHECK-LABEL: define i1 @sccp_negative_mul_wraps_once_shrinks_to_middle_window(
 ; CHECK-SAME: i8 range(i8 3, 17) [[X:%.*]]) {
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X]], -20
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[M]], 60
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], -10
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 3
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %m = mul i8 %x, -20
@@ -393,8 +389,8 @@ define i1 @sccp_negative_mul_wraps_once_shrinks_to_middle_window(i8 range(i8 3,
 define i1 @sccp_shl_wraps_once_shrinks_to_upper_window(i8 range(i8 5, 23) %x) {
 ; CHECK-LABEL: define i1 @sccp_shl_wraps_once_shrinks_to_upper_window(
 ; CHECK-SAME: i8 range(i8 5, 23) [[X:%.*]]) {
-; CHECK-NEXT:    [[Y:%.*]] = shl i8 [[X]], 4
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp uge i8 [[Y]], -96
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], -10
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6
 ; CHECK-NEXT:    ret i1 [[TMP2]]
 ;
   %y = shl i8 %x, 4
@@ -405,9 +401,8 @@ define i1 @sccp_shl_wraps_once_shrinks_to_upper_window(i8 range(i8 5, 23) %x) {
 define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window1(i8 range(i8 2, 6) %x) {
 ; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window1(
 ; CHECK-SAME: i8 range(i8 2, 6) [[X:%.*]]) {
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X]], 100
-; CHECK-NEXT:    [[A:%.*]] = add i8 [[M]], 56
-; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i8 [[A]], 100
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X]], -3
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[TMP1]], 2
 ; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %m = mul i8 %x, 100
@@ -434,8 +429,7 @@ define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window2(i8 range(i8 -2, 2) %
 define i1 @sccp_mul_wraps_once_shrinks_to_singleton(i8 range(i8 0, 18) %x) {
 ; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_singleton(
 ; CHECK-SAME: i8 range(i8 0, 18) [[X:%.*]]) {
-; CHECK-NEXT:    [[M:%.*]] = mul i8 [[X]], 20
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[M]], -116
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i8 [[X]], 7
 ; CHECK-NEXT:    ret i1 [[TMP1]]
 ;
   %m = mul i8 %x, 20
@@ -525,8 +519,8 @@ define i1 @sccp_mul_wraps_once_shrinks_to_wrapped_domain_singleton(i8 range(i8 2
 define <4 x i1> @vec_splat_and(<4 x i32> range(i32 100, 456) %x) {
 ; CHECK-LABEL: define <4 x i1> @vec_splat_and(
 ; CHECK-SAME: <4 x i32> range(i32 100, 456) [[X:%.*]]) {
-; CHECK-NEXT:    [[M:%.*]] = and <4 x i32> [[X]], splat (i32 255)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp uge <4 x i32> [[M]], splat (i32 230)
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[X]], splat (i32 -230)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 26)
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
   %m = and <4 x i32> %x, splat (i32 255)
@@ -537,9 +531,8 @@ define <4 x i1> @vec_splat_and(<4 x i32> range(i32 100, 456) %x) {
 define <4 x i1> @vec_splat_mul(<4 x i8> range(i8 10, 17) %x) {
 ; CHECK-LABEL: define <4 x i1> @vec_splat_mul(
 ; CHECK-SAME: <4 x i8> range(i8 10, 17) [[X:%.*]]) {
-; CHECK-NEXT:    [[M:%.*]] = mul <4 x i8> [[X]], splat (i8 50)
-; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i8> [[M]], splat (i8 -40)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <4 x i8> [[ADD]], splat (i8 110)
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i8> [[X]], splat (i8 -12)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <4 x i8> [[TMP1]], splat (i8 2)
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
   %m = mul <4 x i8> %x, splat (i8 50)
@@ -551,8 +544,8 @@ define <4 x i1> @vec_splat_mul(<4 x i8> range(i8 10, 17) %x) {
 define <4 x i1> @vec_splat_urem(<4 x i32> range(i32 1, 10) %x) {
 ; CHECK-LABEL: define <4 x i1> @vec_splat_urem(
 ; CHECK-SAME: <4 x i32> range(i32 1, 10) [[X:%.*]]) {
-; CHECK-NEXT:    [[M:%.*]] = urem <4 x i32> [[X]], splat (i32 8)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp uge <4 x i32> [[M]], splat (i32 2)
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[X]], splat (i32 -2)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 6)
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
   %m = urem <4 x i32> %x, splat (i32 8)
@@ -563,8 +556,7 @@ define <4 x i1> @vec_splat_urem(<4 x i32> range(i32 1, 10) %x) {
 define <4 x i1> @vec_splat_shl(<4 x i8> range(i8 1, 4) %x) {
 ; CHECK-LABEL: define <4 x i1> @vec_splat_shl(
 ; CHECK-SAME: <4 x i8> range(i8 1, 4) [[X:%.*]]) {
-; CHECK-NEXT:    [[M:%.*]] = shl <4 x i8> [[X]], splat (i8 7)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp uge <4 x i8> [[M]], splat (i8 100)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <4 x i8> [[X]], splat (i8 2)
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
   %m = shl <4 x i8> %x, splat (i8 7)

>From 16807082b1f97315452ba82cae259594704670e2 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Thu, 26 Mar 2026 01:50:09 +0800
Subject: [PATCH 9/9] Update other tests

---
 llvm/test/Transforms/PhaseOrdering/cmp-logic.ll | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll b/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll
index 04eae7d2941d8..72d7651509f9e 100644
--- a/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll
+++ b/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll
@@ -111,8 +111,7 @@ define i32 @PR56119(i32 %e.coerce) {
 ; O1-LABEL: @PR56119(
 ; O1-NEXT:  entry:
 ; O1-NEXT:    [[CONV2:%.*]] = and i32 [[E_COERCE:%.*]], 255
-; O1-NEXT:    [[REM:%.*]] = urem i32 [[CONV2]], 255
-; O1-NEXT:    [[CMP:%.*]] = icmp eq i32 [[REM]], 7
+; O1-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CONV2]], 7
 ; O1-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; O1:       if.then:
 ; O1-NEXT:    tail call void (...) @foo()