[llvm] [SCCP] Fold y = f(x) = (Cx mod M) ∈ R into x ∈ R' (PR #186347)
Kunqiu Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 26 01:48:21 PDT 2026
https://github.com/Camsyn updated https://github.com/llvm/llvm-project/pull/186347
>From 4b2a73fd025d96914d9358b30e86688a023885b2 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Mon, 23 Mar 2026 17:47:30 +0800
Subject: [PATCH 1/9] Pre-commit tests
---
.../Transforms/SCCP/relax-range-checks.ll | 123 ++++++++++++++++++
1 file changed, 123 insertions(+)
diff --git a/llvm/test/Transforms/SCCP/relax-range-checks.ll b/llvm/test/Transforms/SCCP/relax-range-checks.ll
index 34e48136df37a..f7c4f6f468929 100644
--- a/llvm/test/Transforms/SCCP/relax-range-checks.ll
+++ b/llvm/test/Transforms/SCCP/relax-range-checks.ll
@@ -113,4 +113,127 @@ define i1 @range_check_to_icmp_eq2(i32 range(i32 -1, 2) %x) {
ret i1 %cmp
}
+define i1 @range_check_to_icmp_ult(i8 range(i8 2, 10) %x) {
+; CHECK-LABEL: define i1 @range_check_to_icmp_ult(
+; CHECK-SAME: i8 range(i8 2, 10) [[X:%.*]]) {
+; CHECK-NEXT: [[OFF:%.*]] = add nsw i8 [[X]], -2
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[OFF]], 4
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %off = add i8 %x, -2
+ %cmp = icmp ult i8 %off, 4
+ ret i1 %cmp
+}
+
+define i1 @range_check_to_icmp_uge(i8 range(i8 2, 6) %x) {
+; CHECK-LABEL: define i1 @range_check_to_icmp_uge(
+; CHECK-SAME: i8 range(i8 2, 6) [[X:%.*]]) {
+; CHECK-NEXT: [[OFF:%.*]] = add nsw i8 [[X]], -4
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[OFF]], 2
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %off = add nsw i8 %x, -4
+ %cmp = icmp ult i8 %off, 2
+ ret i1 %cmp
+}
+
+define i1 @range_check_to_icmp_slt(i8 range(i8 -56, 20) %x) {
+; CHECK-LABEL: define i1 @range_check_to_icmp_slt(
+; CHECK-SAME: i8 range(i8 -56, 20) [[X:%.*]]) {
+; CHECK-NEXT: [[OFF:%.*]] = add nsw i8 [[X]], 56
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[OFF]], 50
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %off = add nsw i8 %x, 56
+ %cmp = icmp ult i8 %off, 50
+ ret i1 %cmp
+}
+
+define i1 @range_check_to_icmp_sge(i8 range(i8 -56, 20) %x) {
+; CHECK-LABEL: define i1 @range_check_to_icmp_sge(
+; CHECK-SAME: i8 range(i8 -56, 20) [[X:%.*]]) {
+; CHECK-NEXT: [[OFF:%.*]] = add nsw i8 [[X]], 16
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[OFF]], 36
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %off = add nsw i8 %x, 16
+ %cmp = icmp ult i8 %off, 36
+ ret i1 %cmp
+}
+
+; Cover the early exit when ActiveCmpCR is already a one-icmp check.
+
+define i1 @range_check_intersection_to_icmp_eq(i32 range(i32 0, 4) %x) {
+; CHECK-LABEL: define i1 @range_check_intersection_to_icmp_eq(
+; CHECK-SAME: i32 range(i32 0, 4) [[X:%.*]]) {
+; CHECK-NEXT: [[OFF:%.*]] = add nsw i32 [[X]], -3
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 3
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %off = add nsw i32 %x, -3
+ %cmp = icmp ult i32 %off, 2
+ ret i1 %cmp
+}
+
+define i1 @range_check_intersection_to_icmp_ult(i8 range(i8 0, 10) %x) {
+; CHECK-LABEL: define i1 @range_check_intersection_to_icmp_ult(
+; CHECK-SAME: i8 range(i8 0, 10) [[X:%.*]]) {
+; CHECK-NEXT: [[OFF:%.*]] = add nuw nsw i8 [[X]], 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X]], 4
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %off = add i8 %x, 2
+ %cmp = icmp ult i8 %off, 6
+ ret i1 %cmp
+}
+
+define i1 @range_check_intersection_to_icmp_slt(i8 range(i8 -128, -100) %x) {
+; CHECK-LABEL: define i1 @range_check_intersection_to_icmp_slt(
+; CHECK-SAME: i8 range(i8 -128, -100) [[X:%.*]]) {
+; CHECK-NEXT: [[OFF:%.*]] = add i8 [[X]], -120
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X]], -118
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %off = add i8 %x, -120
+ %cmp = icmp ult i8 %off, 18
+ ret i1 %cmp
+}
+
+define i1 @range_check_intersection_to_icmp_uge(i8 range(i8 -6, 0) %x) {
+; CHECK-LABEL: define i1 @range_check_intersection_to_icmp_uge(
+; CHECK-SAME: i8 range(i8 -6, 0) [[X:%.*]]) {
+; CHECK-NEXT: [[OFF:%.*]] = add nsw i8 [[X]], 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X]], -2
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %off = add i8 %x, 2
+ %cmp = icmp ult i8 %off, 6
+ ret i1 %cmp
+}
+
+define i1 @range_check_intersection_to_icmp_sge(i8 range(i8 120, -128) %x) {
+; CHECK-LABEL: define i1 @range_check_intersection_to_icmp_sge(
+; CHECK-SAME: i8 range(i8 120, -128) [[X:%.*]]) {
+; CHECK-NEXT: [[OFF:%.*]] = add nsw i8 [[X]], -122
+; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X]], 122
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %off = add i8 %x, -122
+ %cmp = icmp ult i8 %off, 14
+ ret i1 %cmp
+}
+
+; Negative test: CmpCR relaxation cannot perform when x's range is nuw and nsw.
+define i1 @range_check_nsw_nuw(i8 range(i8 -20, -56) %x) {
+; CHECK-LABEL: define i1 @range_check_nsw_nuw(
+; CHECK-SAME: i8 range(i8 -20, -56) [[X:%.*]]) {
+; CHECK-NEXT: [[OFF:%.*]] = add i8 [[X]], 20
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[OFF]], 14
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %off = add i8 %x, 20
+ %cmp = icmp ult i8 %off, 14
+ ret i1 %cmp
+}
+
declare void @use(i8)
>From 4be1c34e10a4c538047f7c64eff8a9e0e4f0fd1c Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Mon, 23 Mar 2026 18:06:10 +0800
Subject: [PATCH 2/9] Relax 2-insts range check to one-icmp check
---
llvm/lib/Transforms/Utils/SCCPSolver.cpp | 130 +++++++++++++++---
.../Transforms/SCCP/relax-range-checks.ll | 8 +-
2 files changed, 112 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index fd315c14df866..c3414d63ada57 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SCCPSolver.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -105,6 +106,96 @@ static ConstantRange getRange(Value *Op, SCCPSolver &Solver,
/*UndefAllowed=*/false);
}
+/// SCCP already proves x \in KnownCR, so only ActiveCmpCR = CmpCR ∩ KnownCR
+/// matters. Try to replace CmpCR with a simpler equivalent range NewCmpCR
+/// such that NewCmpCR ∩ KnownCR == ActiveCmpCR.
+///
+/// Prefer ranges that lower to a single canonical compare without an add:
+/// - [L, L+1) --> X eq L
+/// - [R+1, R) --> X ne R
+/// - [0, R) --> X ult R
+/// - [L, 0) --> X uge L
+/// - [SignMin, R) --> X slt R
+/// - [L, SignMin) --> X sge L
+///
+/// If no such range preserves the active semantics under KnownCR, keep CmpCR.
+static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
+ const ConstantRange &KnownCR) {
+ assert(!KnownCR.inverse().contains(CmpCR) &&
+ "CmpCR ∩ KnowCR should not be ∅");
+ assert((!CmpCR.isFullSet() && !CmpCR.isEmptySet()) && "Unexpected CmpCR");
+ assert((!KnownCR.isFullSet() && !KnownCR.isEmptySet()) &&
+ "Unexpected KnownCR");
+
+ // If KnownCR is both nuw and nsw, we cannot relax CmpCR at all.
+ if (KnownCR.isWrappedSet() && KnownCR.isSignWrappedSet())
+ return CmpCR;
+
+ const unsigned BW = CmpCR.getBitWidth();
+ // All reachable value satisfy CmpCR --> always true.
+ if (CmpCR.contains(KnownCR))
+ return ConstantRange::getFull(BW);
+
+ std::optional<ConstantRange> ActCmpCR = CmpCR.exactIntersectWith(KnownCR);
+ if (!ActCmpCR)
+ return CmpCR;
+
+ const APInt &CmpLo = ActCmpCR->getLower(), &CmpHi = ActCmpCR->getUpper();
+
+ // If the intersection happens to be the ONE-icmp check, just return it.
+ if (/*eq*/ ActCmpCR->isSingleElement() ||
+ /*ne*/ ActCmpCR->inverse().isSingleElement() ||
+ /*ult*/ CmpLo.isZero() ||
+ /*slt*/ CmpLo.isMinSignedValue() ||
+ /*uge*/ CmpHi.isZero() ||
+ /*sge*/ CmpHi.isMinSignedValue())
+ return *ActCmpCR;
+
+ const APInt Zero = APInt::getZero(BW);
+ const APInt SignMin = APInt::getSignedMinValue(BW);
+
+ if (CmpLo == KnownCR.getLower()) {
+ // Tie to lower:
+
+ // Try ult
+ // 0
+ // | L------------R : KnownCR
+ // | L---R : ActiveCmpCR
+ // L------R : RelaxedCmpCR
+ if (!KnownCR.isWrappedSet())
+ return ConstantRange::getNonEmpty(Zero, CmpHi);
+
+ // Try slt
+ // smin smin
+ // -----R | L------- : KnownCR ----R | L------- : KnownCR
+ // | L--R : ActiveCmpCR --R | L------- : ActiveCmpCR
+ // L-----R : RelaxedCmpCR --R L---------- : RelaxedCmpCR
+ if (!KnownCR.isSignWrappedSet())
+ return ConstantRange::getNonEmpty(SignMin, CmpHi);
+
+ } else if (CmpHi == KnownCR.getUpper()) {
+ // Tie to upper:
+
+ // Try uge
+ // 0
+ // | L--------R : KnownCR
+ // | L---R : ActiveCmpCR
+ // R L---------- : RelaxedCmpCR
+ if (!KnownCR.isWrappedSet())
+ return ConstantRange::getNonEmpty(CmpLo, Zero);
+
+ // Try sge
+ // smin smin
+ // -----R | L------- : KnownCR -----R | L------- : KnownCR
+ // L--R | : ActiveCmpCR -----R | L--- : ActiveCmpCR
+ // L-----R : RelaxedCmpCR --------R L--- : RelaxedCmpCR
+ if (!KnownCR.isSignWrappedSet())
+ return ConstantRange::getNonEmpty(CmpLo, SignMin);
+ }
+
+ return CmpCR;
+}
+
/// Try to use \p Inst's value range from \p Solver to infer the NUW flag.
static bool refineInstruction(SCCPSolver &Solver,
const SmallPtrSetImpl<Value *> &InsertedValues,
@@ -318,29 +409,24 @@ static Value *simplifyInstruction(SCCPSolver &Solver,
// Early exit if we know nothing about X.
if (LRange.isFullSet())
return nullptr;
- auto ConvertCRToICmp =
- [&](const std::optional<ConstantRange> &NewCR) -> Value * {
- ICmpInst::Predicate Pred;
- APInt RHS;
- // Check if we can represent NewCR as an icmp predicate.
- if (NewCR && NewCR->getEquivalentICmp(Pred, RHS)) {
- IRBuilder<NoFolder> Builder(&Inst);
- Value *NewICmp =
- Builder.CreateICmp(Pred, X, ConstantInt::get(X->getType(), RHS));
- InsertedValues.insert(NewICmp);
- return NewICmp;
- }
- return nullptr;
- };
// We are allowed to refine the comparison to either true or false for out
- // of range inputs.
- // Here we refine the comparison to false, and check if we can narrow the
- // range check to a simpler test.
- if (auto *V = ConvertCRToICmp(CR->exactIntersectWith(LRange)))
- return V;
- // Here we refine the comparison to true, i.e. we relax the range check.
- if (auto *V = ConvertCRToICmp(CR->exactUnionWith(LRange.inverse())))
- return V;
+ // of range inputs. Based on this, try to simplify CmpCR as a single
+ // ult/uge/slt/sge/eq/ne.
+ // E.g., CmpCR = [3, 10), LRange = [5, 0) --> NewCmpCR = [0, 10) -> ult
+ ConstantRange NewCmpCR = simplifyCmpRange(*CR, LRange);
+
+ ICmpInst::Predicate Pred;
+ APInt RHS;
+ // If NewCmpCR is just the same as CR, no simplification happens.
+ if (NewCmpCR != *CR) {
+ bool Match [[maybe_unused]] = NewCmpCR.getEquivalentICmp(Pred, RHS);
+ assert(Match && "Incorrect simplifyCmpRange");
+ IRBuilder<NoFolder> Builder(&Inst);
+ Value *NewICmp =
+ Builder.CreateICmp(Pred, X, ConstantInt::get(X->getType(), RHS));
+ InsertedValues.insert(NewICmp);
+ return NewICmp;
+ }
}
}
diff --git a/llvm/test/Transforms/SCCP/relax-range-checks.ll b/llvm/test/Transforms/SCCP/relax-range-checks.ll
index f7c4f6f468929..998271b3b24d9 100644
--- a/llvm/test/Transforms/SCCP/relax-range-checks.ll
+++ b/llvm/test/Transforms/SCCP/relax-range-checks.ll
@@ -117,7 +117,7 @@ define i1 @range_check_to_icmp_ult(i8 range(i8 2, 10) %x) {
; CHECK-LABEL: define i1 @range_check_to_icmp_ult(
; CHECK-SAME: i8 range(i8 2, 10) [[X:%.*]]) {
; CHECK-NEXT: [[OFF:%.*]] = add nsw i8 [[X]], -2
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[OFF]], 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[X]], 6
; CHECK-NEXT: ret i1 [[CMP]]
;
%off = add i8 %x, -2
@@ -129,7 +129,7 @@ define i1 @range_check_to_icmp_uge(i8 range(i8 2, 6) %x) {
; CHECK-LABEL: define i1 @range_check_to_icmp_uge(
; CHECK-SAME: i8 range(i8 2, 6) [[X:%.*]]) {
; CHECK-NEXT: [[OFF:%.*]] = add nsw i8 [[X]], -4
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[OFF]], 2
+; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[X]], 4
; CHECK-NEXT: ret i1 [[CMP]]
;
%off = add nsw i8 %x, -4
@@ -141,7 +141,7 @@ define i1 @range_check_to_icmp_slt(i8 range(i8 -56, 20) %x) {
; CHECK-LABEL: define i1 @range_check_to_icmp_slt(
; CHECK-SAME: i8 range(i8 -56, 20) [[X:%.*]]) {
; CHECK-NEXT: [[OFF:%.*]] = add nsw i8 [[X]], 56
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[OFF]], 50
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[X]], -6
; CHECK-NEXT: ret i1 [[CMP]]
;
%off = add nsw i8 %x, 56
@@ -153,7 +153,7 @@ define i1 @range_check_to_icmp_sge(i8 range(i8 -56, 20) %x) {
; CHECK-LABEL: define i1 @range_check_to_icmp_sge(
; CHECK-SAME: i8 range(i8 -56, 20) [[X:%.*]]) {
; CHECK-NEXT: [[OFF:%.*]] = add nsw i8 [[X]], 16
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[OFF]], 36
+; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[X]], -16
; CHECK-NEXT: ret i1 [[CMP]]
;
%off = add nsw i8 %x, 16
>From 2c5b36424159b3dbdfe0bf0baf68626f3da6088c Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Tue, 24 Mar 2026 21:14:03 +0800
Subject: [PATCH 3/9] fix: CmpCR could be simple enough
---
llvm/lib/Transforms/Utils/SCCPSolver.cpp | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index c3414d63ada57..206b4dc0a7691 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -13,7 +13,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SCCPSolver.h"
-#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -417,10 +416,8 @@ static Value *simplifyInstruction(SCCPSolver &Solver,
ICmpInst::Predicate Pred;
APInt RHS;
- // If NewCmpCR is just the same as CR, no simplification happens.
- if (NewCmpCR != *CR) {
- bool Match [[maybe_unused]] = NewCmpCR.getEquivalentICmp(Pred, RHS);
- assert(Match && "Incorrect simplifyCmpRange");
+ // NewCmpCR might be CmpCR, i.e., no simplification happens.
+ if (NewCmpCR.getEquivalentICmp(Pred, RHS)) {
IRBuilder<NoFolder> Builder(&Inst);
Value *NewICmp =
Builder.CreateICmp(Pred, X, ConstantInt::get(X->getType(), RHS));
>From 6554b14e65496cfbf4f416cfe3ab646dae958912 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Tue, 24 Mar 2026 23:44:23 +0800
Subject: [PATCH 4/9] fix: move a guard to its proper location
---
llvm/lib/Transforms/Utils/SCCPSolver.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 206b4dc0a7691..44a11fb2d745f 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -126,10 +126,6 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
assert((!KnownCR.isFullSet() && !KnownCR.isEmptySet()) &&
"Unexpected KnownCR");
- // If KnownCR is both nuw and nsw, we cannot relax CmpCR at all.
- if (KnownCR.isWrappedSet() && KnownCR.isSignWrappedSet())
- return CmpCR;
-
const unsigned BW = CmpCR.getBitWidth();
// All reachable value satisfy CmpCR --> always true.
if (CmpCR.contains(KnownCR))
@@ -150,6 +146,10 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
/*sge*/ CmpHi.isMinSignedValue())
return *ActCmpCR;
+ // If KnownCR is both nuw and nsw, we cannot relax CmpCR at all.
+ if (KnownCR.isWrappedSet() && KnownCR.isSignWrappedSet())
+ return CmpCR;
+
const APInt Zero = APInt::getZero(BW);
const APInt SignMin = APInt::getSignedMinValue(BW);
>From 42f845f62ff5ce8578613ddb64678fc740c03fe7 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Wed, 25 Mar 2026 00:41:38 +0800
Subject: [PATCH 5/9] fix: suport ne relaxing
---
llvm/lib/Transforms/Utils/SCCPSolver.cpp | 19 +++++++++++++++----
1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 44a11fb2d745f..62f49dd7f6a2a 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -146,16 +146,19 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
/*sge*/ CmpHi.isMinSignedValue())
return *ActCmpCR;
- // If KnownCR is both nuw and nsw, we cannot relax CmpCR at all.
- if (KnownCR.isWrappedSet() && KnownCR.isSignWrappedSet())
- return CmpCR;
-
const APInt Zero = APInt::getZero(BW);
const APInt SignMin = APInt::getSignedMinValue(BW);
if (CmpLo == KnownCR.getLower()) {
// Tie to lower:
+ // Try ne
+ // L------------R : KnownCR
+ // L-----------R : ActiveCmpCR
+ // ---------------RL-- : RelaxedCmpCR
+ if (CmpHi + 1 == KnownCR.getUpper())
+ return ConstantRange::getNonEmpty(KnownCR.getUpper(), CmpHi);
+
// Try ult
// 0
// | L------------R : KnownCR
@@ -175,6 +178,14 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
} else if (CmpHi == KnownCR.getUpper()) {
// Tie to upper:
+ // Try ne
+ //
+ // L--------R : KnownCR
+ // L-------R : ActiveCmpCR
+ // ---RL-------------- : RelaxedCmpCR
+ if (KnownCR.getLower() + 1 == CmpLo)
+ return ConstantRange::getNonEmpty(CmpLo, KnownCR.getLower());
+
// Try uge
// 0
// | L--------R : KnownCR
>From 756d74fcf107c5df3f7d02885d3b6a62a32c7acc Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Wed, 25 Mar 2026 20:42:11 +0800
Subject: [PATCH 6/9] fix: prefer ne rather than ge/lt
---
llvm/lib/Transforms/Utils/SCCPSolver.cpp | 45 ++++++++++++------------
1 file changed, 23 insertions(+), 22 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 62f49dd7f6a2a..cfbf733eb52c8 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -134,13 +134,29 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
std::optional<ConstantRange> ActCmpCR = CmpCR.exactIntersectWith(KnownCR);
if (!ActCmpCR)
return CmpCR;
+ // Proof of ActCmpCR cannot be ne:
+ // 1. ActCmpCR = ne ∧ ActCmpCR ⊆ KnownCR -> KnownCR = ActCmpCR/fullset
+ // 2. KnownCR = fullset contradicts KnownCR != fullset
+ // 3. KnownCR = ActCmpCR = KnownCR ∩ CmpCR -> KnownCR ⊆ CmpCR
+ // 4. KnownCR ⊆ CmpCR contradicts KnownCR ⊈ CmpCR
+ assert(/*ne*/ !ActCmpCR->inverse().isSingleElement() && "Unexpected ne");
+
+ // We prefer eq rather than ne.
+ if (/*eq*/ ActCmpCR->isSingleElement())
+ return *ActCmpCR;
+
+ // We prefer ne rather than lt/ge.
+ // L--------R : KnownCR or L------------R : KnownCR
+ // L-------R : ActiveCmpCR L-----------R : ActiveCmpCR
+ // ---RL-------------- : RelaxedCmpCR ---------------RL-- : RelaxedCmpCR
+ if (const ConstantRange FalseCR = KnownCR.intersectWith(ActCmpCR->inverse());
+ FalseCR.isSingleElement())
+ return FalseCR.inverse();
const APInt &CmpLo = ActCmpCR->getLower(), &CmpHi = ActCmpCR->getUpper();
// If the intersection happens to be the ONE-icmp check, just return it.
- if (/*eq*/ ActCmpCR->isSingleElement() ||
- /*ne*/ ActCmpCR->inverse().isSingleElement() ||
- /*ult*/ CmpLo.isZero() ||
+ if (/*ult*/ CmpLo.isZero() ||
/*slt*/ CmpLo.isMinSignedValue() ||
/*uge*/ CmpHi.isZero() ||
/*sge*/ CmpHi.isMinSignedValue())
@@ -152,14 +168,7 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
if (CmpLo == KnownCR.getLower()) {
// Tie to lower:
- // Try ne
- // L------------R : KnownCR
- // L-----------R : ActiveCmpCR
- // ---------------RL-- : RelaxedCmpCR
- if (CmpHi + 1 == KnownCR.getUpper())
- return ConstantRange::getNonEmpty(KnownCR.getUpper(), CmpHi);
-
- // Try ult
+ // Try ult.
// 0
// | L------------R : KnownCR
// | L---R : ActiveCmpCR
@@ -167,7 +176,7 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
if (!KnownCR.isWrappedSet())
return ConstantRange::getNonEmpty(Zero, CmpHi);
- // Try slt
+ // Try slt.
// smin smin
// -----R | L------- : KnownCR ----R | L------- : KnownCR
// | L--R : ActiveCmpCR --R | L------- : ActiveCmpCR
@@ -178,15 +187,7 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
} else if (CmpHi == KnownCR.getUpper()) {
// Tie to upper:
- // Try ne
- //
- // L--------R : KnownCR
- // L-------R : ActiveCmpCR
- // ---RL-------------- : RelaxedCmpCR
- if (KnownCR.getLower() + 1 == CmpLo)
- return ConstantRange::getNonEmpty(CmpLo, KnownCR.getLower());
-
- // Try uge
+ // Try uge.
// 0
// | L--------R : KnownCR
// | L---R : ActiveCmpCR
@@ -194,7 +195,7 @@ static ConstantRange simplifyCmpRange(const ConstantRange &CmpCR,
if (!KnownCR.isWrappedSet())
return ConstantRange::getNonEmpty(CmpLo, Zero);
- // Try sge
+ // Try sge.
// smin smin
// -----R | L------- : KnownCR -----R | L------- : KnownCR
// L--R | : ActiveCmpCR -----R | L--- : ActiveCmpCR
>From 80911f3d6b43ece71c03717431a9753d9fb3fb70 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Thu, 26 Mar 2026 01:32:59 +0800
Subject: [PATCH 7/9] Pre-commit tests
---
.../SCCP/eager-invertible-periodic-mapping.ll | 21 +
.../invertible-periodic-linear-mapping.ll | 573 ++++++++++++++++++
2 files changed, 594 insertions(+)
create mode 100644 llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll
create mode 100644 llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll
diff --git a/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll b/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll
new file mode 100644
index 0000000000000..ef59b3d5f448e
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll
@@ -0,0 +1,21 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=sccp -S | FileCheck %s --check-prefix=SCCP
+; RUN: opt < %s -passes=ipsccp -S | FileCheck %s --check-prefix=IPSCCP
+
+define i1 @mul_preimage_only_in_late_sccp(i8 range(i8 0, 5) %x) {
+; SCCP-LABEL: define i1 @mul_preimage_only_in_late_sccp(
+; SCCP-SAME: i8 range(i8 0, 5) [[X:%.*]]) {
+; SCCP-NEXT: [[M:%.*]] = mul nuw nsw i8 [[X]], 17
+; SCCP-NEXT: [[CMP:%.*]] = icmp slt i8 [[M]], 17
+; SCCP-NEXT: ret i1 [[CMP]]
+;
+; IPSCCP-LABEL: define i1 @mul_preimage_only_in_late_sccp(
+; IPSCCP-SAME: i8 range(i8 0, 5) [[X:%.*]]) {
+; IPSCCP-NEXT: [[M:%.*]] = mul nuw nsw i8 [[X]], 17
+; IPSCCP-NEXT: [[CMP:%.*]] = icmp slt i8 [[M]], 17
+; IPSCCP-NEXT: ret i1 [[CMP]]
+;
+ %m = mul i8 %x, 17
+ %cmp = icmp slt i8 %m, 17
+ ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll b/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll
new file mode 100644
index 0000000000000..0ce7b25de517a
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll
@@ -0,0 +1,573 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=sccp,dce -S | FileCheck %s
+
+; Test for icmp (mul (zext x), C) to icmp x,
+; if mul is invertible on given predicate constraint
+; Refer to https://github.com/llvm/llvm-project/pull/186347 to understand
+; the mathematical model.
+
+; Comes from https://github.com/llvm/llvm-project/pull/185907#discussion_r2919506475
+; N = 9, M = 27
+; n = 2^9 = 512, m = 2^27 = 134217728, C = 262657
+; k = floor((n - 1) * C / m) = floor(511 * 262657 / 134217728) = 0
+; CR = [-2^26, 262657), Y = [0, 134217728)
+; Invertible: yes
+define i1 @slt_invertible_zext_mul_full_image(<2 x i9> %v) {
+; CHECK-LABEL: define i1 @slt_invertible_zext_mul_full_image(
+; CHECK-SAME: <2 x i9> [[V:%.*]]) {
+; CHECK-NEXT: [[E:%.*]] = extractelement <2 x i9> [[V]], i64 0
+; CHECK-NEXT: [[Z:%.*]] = zext i9 [[E]] to i27
+; CHECK-NEXT: [[M:%.*]] = mul nuw i27 [[Z]], 262657
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i27 [[M]], 262657
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %e = extractelement <2 x i9> %v, i64 0
+ %z = zext i9 %e to i27
+ %m = mul i27 %z, 262657
+ %cmp = icmp slt i27 %m, 262657
+ ret i1 %cmp
+}
+
+; N = 8, M = 16
+; n = 2^8 = 256, m = 2^16 = 65536, C = 257
+; k = floor((n - 1) * C / m) = floor(255 * 257 / 65536) = 0
+; CR = [-2^15, 257), Y = [0, 65536)
+; Invertible: yes
+define i1 @slt_invertible_zext_mul_full_image_i16(i8 %v) {
+; CHECK-LABEL: define i1 @slt_invertible_zext_mul_full_image_i16(
+; CHECK-SAME: i8 [[V:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i8 [[V]] to i16
+; CHECK-NEXT: [[M:%.*]] = mul nuw i16 [[Z]], 257
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[M]], 257
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %z = zext i8 %v to i16
+ %m = mul nuw i16 %z, 257
+ %cmp = icmp slt i16 %m, 257
+ ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 17
+; k = floor((n - 1) * C / m) = floor(15 * 17 / 256) = 0
+; CR = [-2^7, 17), Y = [0, 256)
+; Invertible: yes
+define i1 @slt_invertible_zext_mul_full_image_i8(i4 %v) {
+; CHECK-LABEL: define i1 @slt_invertible_zext_mul_full_image_i8(
+; CHECK-SAME: i4 [[V:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i4 [[V]] to i8
+; CHECK-NEXT: [[M:%.*]] = mul nuw i8 [[Z]], 17
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[M]], 17
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %z = zext i4 %v to i8
+ %m = mul nuw i8 %z, 17
+ %cmp = icmp slt i8 %m, 17
+ ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 10
+; k = floor((n - 1) * C / m) = floor(15 * 10 / 256) = 0
+; CR = [0, 50), Y = [0, 151)
+; Invertible: yes, because CR ⊆ Y
+define i1 @ult_invertible_zext_mul_partial_image(i4 %x) {
+; CHECK-LABEL: define i1 @ult_invertible_zext_mul_partial_image(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT: [[M:%.*]] = mul nuw i8 [[Z]], 10
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[M]], 50
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %z = zext i4 %x to i8
+ %m = mul i8 %z, 10
+ %cmp = icmp ult i8 %m, 50
+ ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 10
+; k = floor((n - 1) * C / m) = floor(15 * 10 / 256) = 0
+; CR = [0, 200), Y = [0, 151)
+; Invertible: yes
+define i1 @ult_invertible_zext_mul_all_true(i4 %x) {
+; CHECK-LABEL: define i1 @ult_invertible_zext_mul_all_true(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT: ret i1 true
+;
+ %z = zext i4 %x to i8
+ %m = mul i8 %z, 10
+ %cmp = icmp ult i8 %m, 200
+ ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 10
+; k = floor((n - 1) * C / m) = floor(15 * 10 / 256) = 0
+; CR = [200, 256), Y = [0, 151)
+; Invertible: yes
+define i1 @uge_invertible_zext_mul_all_false(i4 %x) {
+; CHECK-LABEL: define i1 @uge_invertible_zext_mul_all_false(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT: ret i1 false
+;
+ %z = zext i4 %x to i8
+ %m = mul i8 %z, 10
+ %cmp = icmp uge i8 %m, 200
+ ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CR = [60, 128), Y = [45, 256)
+; Invertible: yes
+define i1 @sge_invertible_tail_of_zext_mul(i4 %x) {
+; CHECK-LABEL: define i1 @sge_invertible_tail_of_zext_mul(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i8 [[M]], 60
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %z = zext i4 %x to i8
+ %m = mul i8 %z, 20
+ %cmp = icmp sge i8 %m, 60
+ ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CR = [60, 256), Y = [45, 256)
+; Invertible: yes
+define i1 @uge_invertible_tail_of_zext_mul(i4 %x) {
+; CHECK-LABEL: define i1 @uge_invertible_tail_of_zext_mul(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT: [[TMP2:%.*]] = icmp uge i8 [[M]], 60
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %z = zext i4 %x to i8
+ %m = mul i8 %z, 20
+ %cmp = icmp uge i8 %m, 60
+ ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 18
+; k = floor((n - 1) * C / m) = floor(15 * 18 / 256) = 1
+; CR = [-2^7, 16), Y = [15, 256)
+; Invertible: yes on Y.inverse() = [16, 2^7)
+define i1 @slt_noninvertible_signed_range_before_tail(i4 %v) {
+; CHECK-LABEL: define i1 @slt_noninvertible_signed_range_before_tail(
+; CHECK-SAME: i4 [[V:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i4 [[V]] to i8
+; CHECK-NEXT: [[CAST:%.*]] = mul nuw i8 [[Z]], 18
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[CAST]], 16
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %z = zext i4 %v to i8
+ %cast = mul nuw i8 %z, 18
+ %cmp = icmp slt i8 %cast, 16
+ ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CR = [0, 45), Y = [45, 256)
+; Invertible: yes on Y.inverse() = [45, 256)
+define i1 @ult_noninvertible_zext_mul_range(i4 %x) {
+; CHECK-LABEL: define i1 @ult_noninvertible_zext_mul_range(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[M]], 45
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %z = zext i4 %x to i8
+ %m = mul i8 %z, 20
+ %cmp = icmp ult i8 %m, 45
+ ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 18
+; k = floor((n - 1) * C / m) = floor(15 * 18 / 256) = 1
+; CR = [0, 16), Y = [15, 256)
+; Invertible: yes on Y.inverse() = [16, 256)
+define i1 @ult_noninvertible_zext_mul_before_tail(i4 %v) {
+; CHECK-LABEL: define i1 @ult_noninvertible_zext_mul_before_tail(
+; CHECK-SAME: i4 [[V:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i4 [[V]] to i8
+; CHECK-NEXT: [[CAST:%.*]] = mul i8 [[Z]], 18
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[CAST]], 16
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %z = zext i4 %v to i8
+ %cast = mul i8 %z, 18
+ %cmp = icmp ult i8 %cast, 16
+ ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CR = [-2^7, 60), Y = [45, 256)
+; Invertible: yes on Y.inverse() = [60, 2^7)
+define i1 @slt_noninvertible_crosses_wrap(i4 %v) {
+; CHECK-LABEL: define i1 @slt_noninvertible_crosses_wrap(
+; CHECK-SAME: i4 [[V:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i4 [[V]] to i8
+; CHECK-NEXT: [[CAST:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[CAST]], 60
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %z = zext i4 %v to i8
+ %cast = mul i8 %z, 20
+ %cmp = icmp slt i8 %cast, 60
+ ret i1 %cmp
+}
+
+; Negative test
+; N = 5, M = 8
+; n = 2^5 = 32, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(31 * 20 / 256) = 2
+; CR = [60, 256), Y = none
+; Invertible: no
+define i1 @uge_noninvertible_multiple_wraps(i5 %x) {
+; CHECK-LABEL: define i1 @uge_noninvertible_multiple_wraps(
+; CHECK-SAME: i5 [[X:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i5 [[X]] to i8
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[M]], 60
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %z = zext i5 %x to i8
+ %m = mul i8 %z, 20
+ %cmp = icmp uge i8 %m, 60
+ ret i1 %cmp
+}
+
+; Tests for CmpCR built through add.
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CmpCR = [110, 228), Y = [45, 256)
+; Invertible: yes
+define i1 @sge_invertible_tail_of_zext_mul_plus_offset(i4 %x) {
+; CHECK-LABEL: define i1 @sge_invertible_tail_of_zext_mul_plus_offset(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT: [[A:%.*]] = sub i8 [[M]], 100
+; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[A]], 10
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %z = zext i4 %x to i8
+ %m = mul i8 %z, 20
+ %a = sub i8 %m, 100
+ %cmp = icmp sge i8 %a, 10
+ ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CmpCR = [100, 200), Y = [45, 256)
+; Invertible: yes
+define i1 @ult_invertible_zext_mul_plus_offset(i4 %x) {
+; CHECK-LABEL: define i1 @ult_invertible_zext_mul_plus_offset(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT: [[A:%.*]] = add i8 [[M]], -100
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[A]], 100
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %z = zext i4 %x to i8
+ %m = mul i8 %z, 20
+ %a = add i8 %m, -100
+ %cmp = icmp ult i8 %a, 100
+ ret i1 %cmp
+}
+
+; N = 4, M = 8
+; n = 2^4 = 16, m = 2^8 = 256, C = 20
+; k = floor((n - 1) * C / m) = floor(15 * 20 / 256) = 1
+; CmpCR = [122, 60), Y = [45, 256)
+; Invertible: yes on CmpCR.inverse() = [60, 122)
+define i1 @slt_inverse_invertible_zext_mul_plus_offset(i4 %x) {
+; CHECK-LABEL: define i1 @slt_inverse_invertible_zext_mul_plus_offset(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
+; CHECK-NEXT: [[A:%.*]] = add i8 [[M]], 6
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[A]], 66
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %z = zext i4 %x to i8
+ %m = mul i8 %z, 20
+ %a = add i8 %m, 6
+ %cmp = icmp slt i8 %a, 66
+ ret i1 %cmp
+}
+
+
+; TODO: support sext
+; Test for icmp (mul (sext x), C) to icmp x.
+
+; Use plain i4 -> i8 sext instead of extra range metadata so the tests cover
+; the extension pattern directly.
+define i1 @sccp_sext_mul_shrinks_to_prefix(i4 %x) {
+; CHECK-LABEL: define i1 @sccp_sext_mul_shrinks_to_prefix(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT: [[S:%.*]] = sext i4 [[X]] to i8
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[S]], 20
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[M]], 60
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %s = sext i4 %x to i8
+ %m = mul i8 %s, 20
+ %cmp = icmp ult i8 %m, 60
+ ret i1 %cmp
+}
+
+
+define i1 @sccp_sext_mul_shrinks_to_negative_suffix(i4 %x) {
+; CHECK-LABEL: define i1 @sccp_sext_mul_shrinks_to_negative_suffix(
+; CHECK-SAME: i4 [[X:%.*]]) {
+; CHECK-NEXT: [[S:%.*]] = sext i4 [[X]] to i8
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[S]], 20
+; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i8 [[M]], -60
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %s = sext i4 %x to i8
+ %m = mul i8 %s, 20
+ %cmp = icmp uge i8 %m, 196
+ ret i1 %cmp
+}
+
+
+; Test for icmp (f(x), C) to icmp x
+
+define i1 @sccp_mul_wraps_once_shrinks_to_middle_window(i8 range(i8 0, 18) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_middle_window(
+; CHECK-SAME: i8 range(i8 0, 18) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[X]], 20
+; CHECK-NEXT: [[TMP2:%.*]] = icmp uge i8 [[M]], 100
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %m = mul i8 %x, 20
+ %cmp = icmp uge i8 %m, 100
+ ret i1 %cmp
+}
+
+define i1 @sccp_urem_wraps_once_shrinks_to_middle_window(i8 range(i8 8, 19) %x) {
+; CHECK-LABEL: define i1 @sccp_urem_wraps_once_shrinks_to_middle_window(
+; CHECK-SAME: i8 range(i8 8, 19) [[X:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = urem i8 [[X]], 10
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 3
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %r = urem i8 %x, 10
+ %cmp = icmp ult i8 %r, 3
+ ret i1 %cmp
+}
+
+define i1 @sccp_negative_mul_wraps_once_shrinks_to_middle_window(i8 range(i8 3, 17) %x) {
+; CHECK-LABEL: define i1 @sccp_negative_mul_wraps_once_shrinks_to_middle_window(
+; CHECK-SAME: i8 range(i8 3, 17) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[X]], -20
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[M]], 60
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %m = mul i8 %x, -20
+ %cmp = icmp ult i8 %m, 60
+ ret i1 %cmp
+}
+
+define i1 @sccp_shl_wraps_once_shrinks_to_upper_window(i8 range(i8 5, 23) %x) {
+; CHECK-LABEL: define i1 @sccp_shl_wraps_once_shrinks_to_upper_window(
+; CHECK-SAME: i8 range(i8 5, 23) [[X:%.*]]) {
+; CHECK-NEXT: [[Y:%.*]] = shl i8 [[X]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = icmp uge i8 [[Y]], -96
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %y = shl i8 %x, 4
+ %cmp = icmp uge i8 %y, 160
+ ret i1 %cmp
+}
+
+define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window1(i8 range(i8 2, 6) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window1(
+; CHECK-SAME: i8 range(i8 2, 6) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[X]], 100
+; CHECK-NEXT: [[A:%.*]] = add i8 [[M]], 56
+; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[A]], 100
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %m = mul i8 %x, 100
+ %a = add i8 %m, -200
+ %cmp = icmp uge i8 %a, 100
+ ret i1 %cmp
+}
+
+; TODO: support wrapped-range
+define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window2(i8 range(i8 -2, 2) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window2(
+; CHECK-SAME: i8 range(i8 -2, 2) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[X]], 100
+; CHECK-NEXT: [[A:%.*]] = add i8 [[M]], 56
+; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[A]], 100
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %m = mul i8 %x, 100
+ %a = add i8 %m, -200
+ %cmp = icmp uge i8 %a, 100
+ ret i1 %cmp
+}
+
+define i1 @sccp_mul_wraps_once_shrinks_to_singleton(i8 range(i8 0, 18) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_singleton(
+; CHECK-SAME: i8 range(i8 0, 18) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[X]], 20
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[M]], -116
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %m = mul i8 %x, 20
+ %cmp = icmp eq i8 %m, 140
+ ret i1 %cmp
+}
+
+; TODO: support wrapped-range
+; Wrapped-range counterparts for the generic f(x) tests above.
+
+define i1 @sccp_mul_wraps_once_shrinks_to_wrapped_window(i8 range(i8 250, 6) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_wrapped_window(
+; CHECK-SAME: i8 range(i8 -6, 6) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = mul nsw i8 [[X]], 20
+; CHECK-NEXT: [[TMP2:%.*]] = icmp uge i8 [[M]], 100
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %m = mul i8 %x, 20
+ %cmp = icmp uge i8 %m, 100
+ ret i1 %cmp
+}
+
+define i1 @sccp_urem_wraps_once_shrinks_to_wrapped_window(i8 range(i8 250, 10) %x) {
+; CHECK-LABEL: define i1 @sccp_urem_wraps_once_shrinks_to_wrapped_window(
+; CHECK-SAME: i8 range(i8 -6, 10) [[X:%.*]]) {
+; CHECK-NEXT: [[R:%.*]] = urem i8 [[X]], 10
+; CHECK-NEXT: [[TMP2:%.*]] = icmp uge i8 [[R]], 6
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %r = urem i8 %x, 10
+ %cmp = icmp uge i8 %r, 6
+ ret i1 %cmp
+}
+
+define i1 @sccp_negative_mul_wraps_once_shrinks_to_wrapped_window(i8 range(i8 250, 6) %x) {
+; CHECK-LABEL: define i1 @sccp_negative_mul_wraps_once_shrinks_to_wrapped_window(
+; CHECK-SAME: i8 range(i8 -6, 6) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = mul nsw i8 [[X]], -20
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[M]], 60
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %m = mul i8 %x, -20
+ %cmp = icmp ult i8 %m, 60
+ ret i1 %cmp
+}
+
+define i1 @sccp_shl_wraps_once_shrinks_to_wrapped_suffix(i8 range(i8 250, 6) %x) {
+; CHECK-LABEL: define i1 @sccp_shl_wraps_once_shrinks_to_wrapped_suffix(
+; CHECK-SAME: i8 range(i8 -6, 6) [[X:%.*]]) {
+; CHECK-NEXT: [[Y:%.*]] = shl nsw i8 [[X]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i8 [[Y]], -96
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %y = shl i8 %x, 4
+ %cmp = icmp uge i8 %y, 160
+ ret i1 %cmp
+}
+
+define i1 @sccp_mul_with_offset_wraps_once_shrinks_to_wrapped_window(i8 range(i8 250, 6) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_with_offset_wraps_once_shrinks_to_wrapped_window(
+; CHECK-SAME: i8 range(i8 -6, 6) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = mul nsw i8 [[X]], 20
+; CHECK-NEXT: [[A:%.*]] = add i8 [[M]], 56
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[A]], 37
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %m = mul i8 %x, 20
+ %a = add i8 %m, -200
+ %cmp = icmp ult i8 %a, 37
+ ret i1 %cmp
+}
+
+define i1 @sccp_mul_wraps_once_shrinks_to_wrapped_domain_singleton(i8 range(i8 252, 8) %x) {
+; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_wrapped_domain_singleton(
+; CHECK-SAME: i8 range(i8 -4, 8) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = mul i8 [[X]], 20
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[M]], -116
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %m = mul i8 %x, 20
+ %cmp = icmp eq i8 %m, 140
+ ret i1 %cmp
+}
+
+; Test for vector : f(vec) = C * vec, C is a splat constant.
+
+define <4 x i1> @vec_splat_and(<4 x i32> range(i32 100, 456) %x) {
+; CHECK-LABEL: define <4 x i1> @vec_splat_and(
+; CHECK-SAME: <4 x i32> range(i32 100, 456) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = and <4 x i32> [[X]], splat (i32 255)
+; CHECK-NEXT: [[CMP:%.*]] = icmp uge <4 x i32> [[M]], splat (i32 230)
+; CHECK-NEXT: ret <4 x i1> [[CMP]]
+;
+ %m = and <4 x i32> %x, splat (i32 255)
+ %cmp = icmp uge <4 x i32> %m, splat (i32 230)
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @vec_splat_mul(<4 x i8> range(i8 10, 17) %x) {
+; CHECK-LABEL: define <4 x i1> @vec_splat_mul(
+; CHECK-SAME: <4 x i8> range(i8 10, 17) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = mul <4 x i8> [[X]], splat (i8 50)
+; CHECK-NEXT: [[ADD:%.*]] = add <4 x i8> [[M]], splat (i8 -40)
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i8> [[ADD]], splat (i8 110)
+; CHECK-NEXT: ret <4 x i1> [[CMP]]
+;
+ %m = mul <4 x i8> %x, splat (i8 50)
+ %add = add <4 x i8> %m, splat (i8 -40)
+ %cmp = icmp ult <4 x i8> %add, splat (i8 110)
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @vec_splat_urem(<4 x i32> range(i32 1, 10) %x) {
+; CHECK-LABEL: define <4 x i1> @vec_splat_urem(
+; CHECK-SAME: <4 x i32> range(i32 1, 10) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = urem <4 x i32> [[X]], splat (i32 8)
+; CHECK-NEXT: [[CMP:%.*]] = icmp uge <4 x i32> [[M]], splat (i32 2)
+; CHECK-NEXT: ret <4 x i1> [[CMP]]
+;
+ %m = urem <4 x i32> %x, splat (i32 8)
+ %cmp = icmp uge <4 x i32> %m, splat (i32 2)
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @vec_splat_shl(<4 x i8> range(i8 1, 4) %x) {
+; CHECK-LABEL: define <4 x i1> @vec_splat_shl(
+; CHECK-SAME: <4 x i8> range(i8 1, 4) [[X:%.*]]) {
+; CHECK-NEXT: [[M:%.*]] = shl <4 x i8> [[X]], splat (i8 7)
+; CHECK-NEXT: [[CMP:%.*]] = icmp uge <4 x i8> [[M]], splat (i8 100)
+; CHECK-NEXT: ret <4 x i1> [[CMP]]
+;
+ %m = shl <4 x i8> %x, splat (i8 7)
+ %cmp = icmp uge <4 x i8> %m, splat (i8 100)
+ ret <4 x i1> %cmp
+}
>From 68ce5f59f05bdba916a3b601922647959e839595 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Thu, 26 Mar 2026 01:49:27 +0800
Subject: [PATCH 8/9] =?UTF-8?q?Fold=20y=20=3D=20f(x)=20=3D=20(Cx=20mod=20M?=
=?UTF-8?q?)=20=E2=88=88=20R=20into=20x=20=E2=88=88=20R'?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../llvm/Transforms/Utils/SCCPSolver.h | 8 +-
llvm/lib/Transforms/Scalar/SCCP.cpp | 3 +-
llvm/lib/Transforms/Utils/SCCPSolver.cpp | 401 +++++++++++++++++-
.../SCCP/eager-invertible-periodic-mapping.ll | 2 +-
.../invertible-periodic-linear-mapping.ll | 92 ++--
5 files changed, 433 insertions(+), 73 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
index 5aac7c2ac5d3e..f9fcd17662f90 100644
--- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
+++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
@@ -194,10 +194,16 @@ class SCCPSolver {
LLVM_ABI void visit(Instruction *I);
LLVM_ABI void visitCall(CallInst &I);
+ /// Simplify instructions in \p BB using the solver's lattice information.
+ /// When \p Eager is true, also apply more aggressive folds that may rewrite
+ /// IR into forms less friendly to earlier canonicalization passes. Keep eager
+ /// mode for later optimization points where exposing extra range-based folds
+ /// outweighs the risk of hiding canonical patterns.
LLVM_ABI bool simplifyInstsInBlock(BasicBlock &BB,
SmallPtrSetImpl<Value *> &InsertedValues,
Statistic &InstRemovedStat,
- Statistic &InstReplacedStat);
+ Statistic &InstReplacedStat,
+ bool Eager = false);
LLVM_ABI bool removeNonFeasibleEdges(BasicBlock *BB, DomTreeUpdater &DTU,
BasicBlock *&NewUnreachableBB) const;
diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index feee794ffeae1..eada8efddd8ff 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -102,7 +102,8 @@ static bool runSCCP(Function &F, const DataLayout &DL,
}
MadeChanges |= Solver.simplifyInstsInBlock(BB, InsertedValues,
- NumInstRemoved, NumInstReplaced);
+ NumInstRemoved, NumInstReplaced,
+ /*Eager=*/true);
}
// Remove unreachable blocks and non-feasible edges.
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index cfbf733eb52c8..4930ce01d13b9 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/Utils/SCCPSolver.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -29,9 +30,9 @@
#include "llvm/Support/Casting.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Local.h"
#include <cassert>
+#include <optional>
#include <utility>
#include <vector>
@@ -105,6 +106,310 @@ static ConstantRange getRange(Value *Op, SCCPSolver &Solver,
/*UndefAllowed=*/false);
}
+namespace {
+
+// Sibling of ConstantRange::getNonEmptyRange
+ConstantRange getMightEmptyRange(const APInt &L, const APInt &R) {
+ return L == R ? ConstantRange::getEmpty(L.getBitWidth())
+ : ConstantRange(L, R);
+}
+
+/// Represents periodic mapping f(x) = Cx mod M
+class ModularMulMapping {
+public:
+ ModularMulMapping(const APInt &C, const APInt &M)
+ : MulC(C), Modulus(M), IsFullMod(M.isZero()), IsURem(C.isOne()),
+ WideBits(C.getBitWidth() * 2), WideMulC(C.zext(WideBits)),
+ WideModulus(M.zext(WideBits)) {
+ assert(C.isStrictlyPositive() && "Expected a positive multiplier C");
+ assert(!IsURem || !M.isZero() && "Expected a valid modulus M if C = 1");
+ }
+
+ // Mapping: f(x) = Cx mod M = ((C mod M) * (x mod M)) mod M
+ APInt operator()(const APInt &x) const {
+ assert(x.getBitWidth() == MulC.getBitWidth() &&
+ "The bit width of x and C should be equal for f(x) = Cx");
+ // Fast path for f(x) = Cx
+ if (IsFullMod)
+ return MulC * x;
+ // Fast path for f(x) = x mod M
+ if (IsURem)
+ return x.urem(Modulus);
+ return (WideMulC * x.zext(WideBits))
+ .urem(WideModulus)
+ .trunc(MulC.getBitWidth());
+ }
+
+ ConstantRange getInvertibleImage(const ConstantRange &SrcCR,
+ bool &IsDomainReturned) const {
+ assert(!SrcCR.isEmptySet() && "Expected non-empty SrcCR");
+ return SrcCR.isWrappedSet()
+ ? getInvertibleImageOnWrappedX(SrcCR, IsDomainReturned)
+ : getInvertibleImageOnUnwrappedX(SrcCR, IsDomainReturned);
+ }
+
+private:
+ ConstantRange getInvertibleImageOnUnwrappedX(const ConstantRange &SrcCR,
+ bool &IsDomainReturned) const {
+ assert(!SrcCR.isWrappedSet() && "Expected nuw SrcCR");
+ const unsigned BW = MulC.getBitWidth();
+
+ const APInt &XLo =
+ SrcCR.isFullSet() ? APInt::getZero(BW) : SrcCR.getLower(),
+ &XHi =
+ SrcCR.isFullSet() ? APInt::getZero(BW) : SrcCR.getUpper();
+ // [Lo, Hi) = [Lo, Hi-1]
+ const APInt RangeSize = XHi - XLo - 1;
+
+ // Periods k = floor(|Range| / T)
+ // = floor((SrcCR.size() - 1) / (M / C))
+ // = floor((SrcCR.size() - 1) * C / M)
+ const APInt SizeMulC = RangeSize.zext(WideBits) * WideMulC;
+ const APInt Periods =
+ IsFullMod ? SizeMulC.lshr(BW) : SizeMulC.udiv(WideModulus);
+
+ if (Periods.isZero()) {
+ // k = 0: the walk never wraps, so the mapping is invertible over the full
+ // result space. Here we return the reachable image/domain
+ // Y = f(CR) = [f(Lo), f(Hi)).
+ // Consider discreteness, Y = [ f(Lo) , f(Hi - 1) + 1 )
+ // f(x) = Cx: Y | /
+ // Y | /
+ // |
+ // Y | /
+ // └-----------
+ // XXX
+ IsDomainReturned = true;
+ // SrcCR is not emptyset -> Y cannot be emptyset but might be fullset.
+ return ConstantRange::getNonEmpty((*this)(XLo),
+ modularAdd((*this)(XHi - 1), 1));
+ }
+
+ if (Periods.isOne()) {
+ // k = 1: only the unique part invertible.
+ // As f(x) walks along f(Lo) --> ⊤ --> ⊥ --> f(Hi),
+ // the repeated part is [f(Lo), f(Hi)) and the invertiable part
+ // Y = [ f(Hi), f(Lo) )
+ // Consider discreteness, Y = [ f(Hi - 1) + 1 , f(Lo) )
+ // f(x) = Cx: Y | /
+ // Y | /
+ // | / /
+ // Y | /
+ // └-----------
+ // ~XXX~
+ IsDomainReturned = false;
+ // [f(Lo), f(Hi)) repeated -> Y cannot be fullset but might be emptyset.
+ return getMightEmptyRange(modularAdd((*this)(XHi - 1), 1), (*this)(XLo));
+ }
+ // k >= 2: the walk overlaps itself too much to have a unique inverse.
+ // f(x) = Cx: | / /
+ // | / /
+ // Y does not exist. | / / /
+ // | / /
+ // └-------------
+ return ConstantRange::getEmpty(BW);
+ }
+ /// If the range of x is wrapped, the linearity of `f(x) = Cx mod M` breaks on
+ /// the end. E.g., as follows, the invertibe image Y is continuos, but the
+ /// related pre-image X is not.
+ /// f(x) = Cx: Y| /
+ /// | / /
+ /// | / /
+ /// Y|/
+ /// └----------
+ /// X~~X ~~
+ ConstantRange getInvertibleImageOnWrappedX(const ConstantRange &SrcCR,
+ bool &IsDomainReturned) const {
+ assert(SrcCR.isWrappedSet() && "Expected wrapped SrcCR");
+ // FIXME: support wrapped SrcCR.
+ return ConstantRange::getEmpty(MulC.getBitWidth());
+ }
+ APInt modularAdd(const APInt &LHS, int RHS) const {
+ return Modulus.isZero() ? LHS + RHS : (RHS + LHS).urem(Modulus);
+ };
+
+ const APInt &MulC;
+ const APInt &Modulus;
+ const bool IsFullMod;
+ const bool IsURem;
+ const unsigned WideBits;
+ const APInt WideMulC;
+ const APInt WideModulus;
+};
+} // namespace
+
+/// Refer to https://github.com/llvm/llvm-project/pull/186347 for the
+/// underlying math model.
+///
+/// Given a result constraint CR on y = f(x) = Step * x mod Modulus and a
+/// source domain X = SrcCR, try to compute a single interval CR' = f^{-1}(CR)
+/// on x.
+///
+/// We first compute the invertible result interval Y for x \in SrcCR, then try
+/// to apply f^{-1} on CR or CR.inverse(). This is valid iff CR ⊆ Y, or iff CR
+/// intersects the reachable image when Y itself is the domain. Modulus == 0
+/// denotes the full iBW ring, i.e. mod 2^BW.
+static std::optional<ConstantRange>
+getPreImageOfModularMul(const ConstantRange &CmpCR, const ConstantRange &SrcCR,
+ const APInt &C, const APInt &Modulus) {
+ assert(!C.isZero() && "Expected a non-zero periodic coefficient");
+ assert(!CmpCR.isEmptySet() && "Unexpected empty constraint set");
+ assert(!CmpCR.isFullSet() && "Unexpected full constraint set");
+ assert(!SrcCR.isEmptySet() && "Unexpected empty input set");
+
+ const unsigned BW = C.getBitWidth();
+
+ ConstantRange Domain =
+ ConstantRange::getNonEmpty(APInt::getZero(BW), Modulus);
+
+ auto NegateRange = [](const ConstantRange &CR) -> ConstantRange {
+ // negate([L,R)) = - [L, R) = [1 - R, 1 - L)
+ return ConstantRange::getNonEmpty(1 - CR.getUpper(), 1 - CR.getLower());
+ };
+ // y = Cx \in CR --> -y = -Cx \in negate(CR)
+ const ConstantRange &ActiveCmpCR =
+ C.isNegative() ? NegateRange(CmpCR) : CmpCR;
+ const APInt &Step = C.isNegative() ? -C : C;
+
+ const ModularMulMapping Mapping{Step, Modulus};
+
+ // ==================================================================== //
+ // 1. Calculate the invertible interval Y for f(x) = Cx mod M.
+ // ==================================================================== //
+
+ bool IsDomainReturned = false;
+ const ConstantRange Y = Mapping.getInvertibleImage(SrcCR, IsDomainReturned);
+
+ if (Y.isEmptySet())
+ return std::nullopt;
+
+ if (IsDomainReturned)
+ Domain = Y;
+
+ // ==================================================================== //
+ // 2. Calculate the equivalent range X via f^{-1} on CmpCR.
+ // ==================================================================== //
+ auto ModularSub = [&Modulus](const APInt &LHS, const APInt &RHS) {
+ return (Modulus.isZero() || LHS.uge(RHS)) ? LHS - RHS : Modulus - RHS + LHS;
+ };
+ // Try to map CmpRange to its pre-image, i.e., f^{-1}(CR).
+ auto TryGetPreImage =
+ [&](const ConstantRange &CR) -> std::optional<ConstantRange> {
+ if (CR.contains(Domain))
+ return /* Domain ⊆ CR*/ ConstantRange::getFull(BW);
+ if (CR.inverse().contains(Domain))
+ return /* Domain ∩ CR = ∅ */ ConstantRange::getEmpty(BW);
+
+ // ActiveCR is the reachable part of CR.
+ // ActiveCmpY = null if
+ // L-------U : Domain or --U L----- : Domain
+ // --U L----- : CR L-------U : CR
+ std::optional<ConstantRange> ActiveCR = Domain.exactIntersectWith(CR);
+ // If ActiveCmpY = null or ActiveCR ⊈ the invertible image Y,
+ // there are >1 separate intervals of x, making Cx ∈ CR.
+ // I.e., we cannot derive a single X.
+ if (!ActiveCR || !Y.contains(*ActiveCR))
+ return std::nullopt;
+
+ // ActiveCR.Hi is not belong to Y, thus we use Y1 = ActiveCR.Hi - 1
+ const APInt &Y0 = ActiveCR->getLower(),
+ &Y1 = ModularSub(ActiveCR->getUpper(), APInt(BW, 1));
+ // Fast path for Lo = 0: X = [ y0 / C , y1 / C )
+ if (SrcCR.getLower().isZero()) {
+ // f(x) = Cx: Y | / X must be Y / C without mod directly.
+ // Y | /
+ // | / /
+ // |/ /
+ // └---------
+ // ~~XX~~
+ const APInt X0 = APIntOps::RoundingUDiv(Y0, Step, APInt::Rounding::UP),
+ X1 = APIntOps::RoundingUDiv(Y1, Step, APInt::Rounding::DOWN) +
+ 1;
+ return getMightEmptyRange(X0, X1);
+ }
+
+ // Given SrcCR = [Lo, Hi) and invertible interval CR = [y0, y1],
+ // we need to find X = [x0, x1] ⊆ SrcCR, s.t., f(X) = CR.
+ // I.e., y0 = f(x0)
+ // DeltaY = y0 - f(Lo) = f(x0) - f(Lo) = f(x0 - Lo) = C * DeltaX
+ // DeltaX = DeltaY / C = (y0 - f(Lo)) / C
+ // x0 = Lo + DeltaX
+ // As y0, f(Lo) ∈ [0, M), we do need to consider modulus.
+ // x1 shares the same derivation.
+ // Considering discreteness, we need to adjust X = [A,B) properly as
+ // follows.
+ // X = [ceil(x0), ceil(x1))
+ const APInt LoY = Mapping(SrcCR.getLower());
+ const APInt DeltaY0 = ModularSub(Y0, LoY), DeltaY1 = ModularSub(Y1, LoY);
+ const APInt DeltaX0 =
+ APIntOps::RoundingUDiv(DeltaY0, Step, APInt::Rounding::UP);
+ const APInt DeltaX1 =
+ APIntOps::RoundingUDiv(DeltaY1, Step, APInt::Rounding::DOWN);
+ const APInt X0 = SrcCR.getLower() + DeltaX0,
+ X1 = SrcCR.getLower() + DeltaX1 + 1;
+ const ConstantRange X = getMightEmptyRange(X0, X1);
+ assert(SrcCR.contains(X) && "X should be subset of SrcCR");
+ return X;
+ };
+
+ // Try to get single X = f^{-1}(CmpCR) to make Cx ∈ CmpCR.
+ if (auto X = TryGetPreImage(ActiveCmpCR))
+ return *X;
+
+ // Try to get single X = f^{-1}(CmpCR.inverse()).inverse() to make Cx ∈ CmpCR.
+ if (auto X = TryGetPreImage(ActiveCmpCR.inverse()))
+ return X->inverse();
+
+ return std::nullopt;
+}
+
+/// Given CmpCR constraining y = f(x) and SCCP's known range SrcCR for x, try to
+/// rewrite the constraint as a single ConstantRange on x.
+///
+/// This only handles mappings that the current solver models via
+/// getPreImageOfModularMul():
+/// - mul x, C : y = C * x mod 2^BW
+/// - shl x, C : y = (2^C) * x mod 2^BW
+/// - urem x, C : y = x mod C
+/// - and x, C : y = x mod C+1 if C is low-bit mask
+///
+/// Returns nullopt if the reachable image from SrcCR does not admit one
+/// invertible interval, or if the preimage of CmpCR cannot be expressed as one
+/// ConstantRange.
+static std::optional<ConstantRange> getPreImageOfInvertiblePeriodicMapping(
+ unsigned Opcode, Value *X, const APInt &C, const ConstantRange &SrcCR,
+ const ConstantRange &CmpCR) {
+ // We support interger vector/scalar.
+ // For vector, the mapping must be fixed, i.e., splat C.
+ assert(X->getType()->getScalarType()->isIntegerTy() &&
+ "Only support integer mapping");
+
+ // TODO: Support srem and other more complex periodic mappings.
+ std::optional<ConstantRange> SrcCmpCR;
+ switch (Opcode) {
+ case Instruction::Mul:
+ // y = C*x = C*x mod (MAX + 1)
+ return getPreImageOfModularMul(CmpCR, SrcCR, C, APInt(C.getBitWidth(), 0));
+ case Instruction::Shl:
+ // y = x << C = 2^C * x mod (MAX + 1)
+ return getPreImageOfModularMul(
+ CmpCR, SrcCR, APInt::getOneBitSet(C.getBitWidth(), C.getZExtValue()),
+ APInt(C.getBitWidth(), 0));
+ case Instruction::And:
+ assert(C.isMask() && "Expected a low-bit mask C");
+ // y = x & C = 1 * x mod C + 1
+ return getPreImageOfModularMul(CmpCR, SrcCR, APInt(C.getBitWidth(), 1),
+ C + 1);
+ case Instruction::URem:
+ // y = x % C = 1 * x mod C
+ return getPreImageOfModularMul(CmpCR, SrcCR, APInt(C.getBitWidth(), 1), C);
+ default:
+ assert(false && "Unsupported invertible periodic linear mapping opcode");
+ }
+
+ return std::nullopt;
+}
+
/// SCCP already proves x \in KnownCR, so only ActiveCmpCR = CmpCR ∩ KnownCR
/// matters. Try to replace CmpCR with a simpler equivalent range NewCmpCR
/// such that NewCmpCR ∩ KnownCR == ActiveCmpCR.
@@ -343,7 +648,7 @@ static bool replaceSignedInst(SCCPSolver &Solver,
/// Try to use \p Inst's value range from \p Solver to simplify it.
static Value *simplifyInstruction(SCCPSolver &Solver,
SmallPtrSetImpl<Value *> &InsertedValues,
- Instruction &Inst) {
+ Instruction &Inst, bool Eager) {
auto GetRange = [&Solver, &InsertedValues](Value *Op) {
return getRange(Op, Solver, InsertedValues);
};
@@ -389,42 +694,97 @@ static Value *simplifyInstruction(SCCPSolver &Solver,
return Sub;
}
- // Relax range checks.
+ // Check if we can relax icmp Pred, Y, ... to a simpler form.
if (auto *ICmp = dyn_cast<ICmpInst>(&Inst)) {
- Value *X;
- auto MatchTwoInstructionExactRangeCheck =
- [&]() -> std::optional<ConstantRange> {
+ Value *Y;
+ bool IsOneUse = false, IsTwoInstRangeCheck = true;
+ auto MatchExactRangeCheck = [&]() -> std::optional<ConstantRange> {
const APInt *RHSC;
+ // Match icmp Pred LHS, C
if (!match(ICmp->getOperand(1), m_APInt(RHSC)))
return std::nullopt;
Value *LHS = ICmp->getOperand(0);
ICmpInst::Predicate Pred = ICmp->getPredicate();
const APInt *Offset;
- if (match(LHS, m_OneUse(m_AddLike(m_Value(X), m_APInt(Offset)))))
- return ConstantRange::makeExactICmpRegion(Pred, *RHSC).sub(*Offset);
- // Match icmp eq/ne X & NegPow2, C
+ IsOneUse = LHS->hasOneUse();
+ if (!IsOneUse)
+ return std::nullopt;
+ const ConstantRange ExactCmpCR =
+ ConstantRange::makeExactICmpRegion(Pred, *RHSC);
+ // Match icmp Pred Y + C1, C2
+ if (match(LHS, m_AddLike(m_Value(Y), m_APInt(Offset))))
+ return ExactCmpCR.sub(*Offset);
+ // Match icmp Pred Y - C1, C2
+ if (match(LHS, m_Sub(m_Value(Y), m_APInt(Offset))))
+ return ExactCmpCR.add(*Offset);
+ // Match icmp eq/ne Y & NegPow2, C
if (ICmp->isEquality()) {
const APInt *Mask;
- if (match(LHS, m_OneUse(m_And(m_Value(X), m_NegatedPower2(Mask)))) &&
+ if (match(LHS, m_And(m_Value(Y), m_NegatedPower2(Mask))) &&
RHSC->countr_zero() >= Mask->countr_zero()) {
ConstantRange CR(*RHSC, *RHSC - *Mask);
return Pred == ICmpInst::ICMP_EQ ? CR : CR.inverse();
}
}
- return std::nullopt;
+ IsTwoInstRangeCheck = false;
+ Y = LHS;
+ return ExactCmpCR;
};
- if (auto CR = MatchTwoInstructionExactRangeCheck()) {
- ConstantRange LRange = GetRange(X);
- // Early exit if we know nothing about X.
- if (LRange.isFullSet())
+ // Match icmp Pred, (op Y, C1), C2 as Y ∈ CmpCR.
+ if (auto CmpCR = MatchExactRangeCheck()) {
+
+ // TODO: support more mappings f
+ // FIXME: should we treat trunc as x % 2^N?
+ // In eager mode, try to simplify Y = f(X) ∈ CR into X ∈ CR'. This is a
+ // more aggressive rewrite that can expose additional SCCP opportunities,
+ // but may also hide canonical forms expected by earlier passes.
+ if (const APInt *C;
+ Eager && /* the sole use of y = f(x) is icmp */ Y->hasOneUse() &&
+ (match(Y, m_c_Mul(m_Value(X), m_APInt(C))) ||
+ match(Y, m_Shl(m_Value(X), m_APInt(C))) ||
+ match(Y, m_URem(m_Value(X), m_APInt(C))) ||
+ match(Y, m_And(m_Value(X), m_LowBitMask(C))))) {
+ ConstantRange XRange = GetRange(X);
+ if (auto XCmpCR = getPreImageOfInvertiblePeriodicMapping(
+ cast<Instruction>(Y)->getOpcode(), X, *C, XRange, *CmpCR)) {
+ // Use XRange to implify XCmpCR. E.g.:
+ // XCmpCR = [5, 10), *XCmpCR = [5, 0) --> NewCmpCR = [0, 10) -> ult
+ *XCmpCR = simplifyCmpRange(*XCmpCR, XRange);
+
+ // Emit XCmpCR as icmp Pred (X + C1), C2
+ ICmpInst::Predicate Pred;
+ APInt RHS, Offset;
+ XCmpCR->getEquivalentICmp(Pred, RHS, Offset);
+
+ IRBuilder<NoFolder> Builder(&Inst);
+ if (!Offset.isZero()) {
+ X = Builder.CreateAdd(X, ConstantInt::get(X->getType(), Offset));
+ InsertedValues.insert(X);
+ }
+
+ Value *NewICmp =
+ Builder.CreateICmp(Pred, X, ConstantInt::get(X->getType(), RHS));
+ InsertedValues.insert(NewICmp);
+ return NewICmp;
+ }
+ }
+
+ // Given Y ∈ YRange, try to simplify: Y ∈ CR --> Y ∈ CR'
+ ConstantRange YRange = GetRange(Y);
+
+ // Early exit if
+ // 1. we know nothing about Y or
+ // 2. LHS has >1 uses (tuned by llvm-opt-bench) or
+ // 3. this ICMP is not two-inst range check.
+ if (YRange.isFullSet() || !IsOneUse || !IsTwoInstRangeCheck)
return nullptr;
// We are allowed to refine the comparison to either true or false for out
// of range inputs. Based on this, try to simplify CmpCR as a single
// ult/uge/slt/sge/eq/ne.
- // E.g., CmpCR = [3, 10), LRange = [5, 0) --> NewCmpCR = [0, 10) -> ult
- ConstantRange NewCmpCR = simplifyCmpRange(*CR, LRange);
+ // E.g., CmpCR = [3, 10), YRange = [5, 0) --> NewCmpCR = [0, 10) -> ult
+ ConstantRange NewCmpCR = simplifyCmpRange(*CmpCR, YRange);
ICmpInst::Predicate Pred;
APInt RHS;
@@ -432,7 +792,7 @@ static Value *simplifyInstruction(SCCPSolver &Solver,
if (NewCmpCR.getEquivalentICmp(Pred, RHS)) {
IRBuilder<NoFolder> Builder(&Inst);
Value *NewICmp =
- Builder.CreateICmp(Pred, X, ConstantInt::get(X->getType(), RHS));
+ Builder.CreateICmp(Pred, Y, ConstantInt::get(Y->getType(), RHS));
InsertedValues.insert(NewICmp);
return NewICmp;
}
@@ -445,7 +805,7 @@ static Value *simplifyInstruction(SCCPSolver &Solver,
bool SCCPSolver::simplifyInstsInBlock(BasicBlock &BB,
SmallPtrSetImpl<Value *> &InsertedValues,
Statistic &InstRemovedStat,
- Statistic &InstReplacedStat) {
+ Statistic &InstReplacedStat, bool Eager) {
bool MadeChanges = false;
for (Instruction &Inst : make_early_inc_range(BB)) {
if (Inst.getType()->isVoidTy())
@@ -461,7 +821,8 @@ bool SCCPSolver::simplifyInstsInBlock(BasicBlock &BB,
++InstReplacedStat;
} else if (refineInstruction(*this, InsertedValues, Inst)) {
MadeChanges = true;
- } else if (auto *V = simplifyInstruction(*this, InsertedValues, Inst)) {
+ } else if (auto *V =
+ simplifyInstruction(*this, InsertedValues, Inst, Eager)) {
Inst.replaceAllUsesWith(V);
Inst.eraseFromParent();
++InstRemovedStat;
diff --git a/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll b/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll
index ef59b3d5f448e..170659ad43c5b 100644
--- a/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll
+++ b/llvm/test/Transforms/SCCP/eager-invertible-periodic-mapping.ll
@@ -6,7 +6,7 @@ define i1 @mul_preimage_only_in_late_sccp(i8 range(i8 0, 5) %x) {
; SCCP-LABEL: define i1 @mul_preimage_only_in_late_sccp(
; SCCP-SAME: i8 range(i8 0, 5) [[X:%.*]]) {
; SCCP-NEXT: [[M:%.*]] = mul nuw nsw i8 [[X]], 17
-; SCCP-NEXT: [[CMP:%.*]] = icmp slt i8 [[M]], 17
+; SCCP-NEXT: [[CMP:%.*]] = icmp eq i8 [[X]], 0
; SCCP-NEXT: ret i1 [[CMP]]
;
; IPSCCP-LABEL: define i1 @mul_preimage_only_in_late_sccp(
diff --git a/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll b/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll
index 0ce7b25de517a..fb176b397c520 100644
--- a/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll
+++ b/llvm/test/Transforms/SCCP/invertible-periodic-linear-mapping.ll
@@ -17,8 +17,8 @@ define i1 @slt_invertible_zext_mul_full_image(<2 x i9> %v) {
; CHECK-SAME: <2 x i9> [[V:%.*]]) {
; CHECK-NEXT: [[E:%.*]] = extractelement <2 x i9> [[V]], i64 0
; CHECK-NEXT: [[Z:%.*]] = zext i9 [[E]] to i27
-; CHECK-NEXT: [[M:%.*]] = mul nuw i27 [[Z]], 262657
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i27 [[M]], 262657
+; CHECK-NEXT: [[TMP1:%.*]] = add i27 [[Z]], -256
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i27 [[TMP1]], -255
; CHECK-NEXT: ret i1 [[TMP2]]
;
%e = extractelement <2 x i9> %v, i64 0
@@ -37,8 +37,8 @@ define i1 @slt_invertible_zext_mul_full_image_i16(i8 %v) {
; CHECK-LABEL: define i1 @slt_invertible_zext_mul_full_image_i16(
; CHECK-SAME: i8 [[V:%.*]]) {
; CHECK-NEXT: [[Z:%.*]] = zext i8 [[V]] to i16
-; CHECK-NEXT: [[M:%.*]] = mul nuw i16 [[Z]], 257
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[M]], 257
+; CHECK-NEXT: [[TMP1:%.*]] = add i16 [[Z]], -128
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i16 [[TMP1]], -127
; CHECK-NEXT: ret i1 [[TMP2]]
;
%z = zext i8 %v to i16
@@ -56,8 +56,8 @@ define i1 @slt_invertible_zext_mul_full_image_i8(i4 %v) {
; CHECK-LABEL: define i1 @slt_invertible_zext_mul_full_image_i8(
; CHECK-SAME: i4 [[V:%.*]]) {
; CHECK-NEXT: [[Z:%.*]] = zext i4 [[V]] to i8
-; CHECK-NEXT: [[M:%.*]] = mul nuw i8 [[Z]], 17
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i8 [[M]], 17
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Z]], -8
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], -7
; CHECK-NEXT: ret i1 [[TMP2]]
;
%z = zext i4 %v to i8
@@ -75,8 +75,7 @@ define i1 @ult_invertible_zext_mul_partial_image(i4 %x) {
; CHECK-LABEL: define i1 @ult_invertible_zext_mul_partial_image(
; CHECK-SAME: i4 [[X:%.*]]) {
; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT: [[M:%.*]] = mul nuw i8 [[Z]], 10
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[M]], 50
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i8 [[Z]], 5
; CHECK-NEXT: ret i1 [[TMP1]]
;
%z = zext i4 %x to i8
@@ -126,8 +125,8 @@ define i1 @sge_invertible_tail_of_zext_mul(i4 %x) {
; CHECK-LABEL: define i1 @sge_invertible_tail_of_zext_mul(
; CHECK-SAME: i4 [[X:%.*]]) {
; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sge i8 [[M]], 60
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Z]], -3
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 4
; CHECK-NEXT: ret i1 [[TMP2]]
;
%z = zext i4 %x to i8
@@ -145,8 +144,8 @@ define i1 @uge_invertible_tail_of_zext_mul(i4 %x) {
; CHECK-LABEL: define i1 @uge_invertible_tail_of_zext_mul(
; CHECK-SAME: i4 [[X:%.*]]) {
; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT: [[TMP2:%.*]] = icmp uge i8 [[M]], 60
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Z]], -3
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 10
; CHECK-NEXT: ret i1 [[TMP2]]
;
%z = zext i4 %x to i8
@@ -164,8 +163,8 @@ define i1 @slt_noninvertible_signed_range_before_tail(i4 %v) {
; CHECK-LABEL: define i1 @slt_noninvertible_signed_range_before_tail(
; CHECK-SAME: i4 [[V:%.*]]) {
; CHECK-NEXT: [[Z:%.*]] = zext i4 [[V]] to i8
-; CHECK-NEXT: [[CAST:%.*]] = mul nuw i8 [[Z]], 18
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[CAST]], 16
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Z]], -8
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP1]], -7
; CHECK-NEXT: ret i1 [[CMP]]
;
%z = zext i4 %v to i8
@@ -183,8 +182,8 @@ define i1 @ult_noninvertible_zext_mul_range(i4 %x) {
; CHECK-LABEL: define i1 @ult_noninvertible_zext_mul_range(
; CHECK-SAME: i4 [[X:%.*]]) {
; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[M]], 45
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Z]], -13
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP1]], -10
; CHECK-NEXT: ret i1 [[CMP]]
;
%z = zext i4 %x to i8
@@ -202,8 +201,8 @@ define i1 @ult_noninvertible_zext_mul_before_tail(i4 %v) {
; CHECK-LABEL: define i1 @ult_noninvertible_zext_mul_before_tail(
; CHECK-SAME: i4 [[V:%.*]]) {
; CHECK-NEXT: [[Z:%.*]] = zext i4 [[V]] to i8
-; CHECK-NEXT: [[CAST:%.*]] = mul i8 [[Z]], 18
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[CAST]], 16
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Z]], -15
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP1]], -14
; CHECK-NEXT: ret i1 [[CMP]]
;
%z = zext i4 %v to i8
@@ -221,8 +220,8 @@ define i1 @slt_noninvertible_crosses_wrap(i4 %v) {
; CHECK-LABEL: define i1 @slt_noninvertible_crosses_wrap(
; CHECK-SAME: i4 [[V:%.*]]) {
; CHECK-NEXT: [[Z:%.*]] = zext i4 [[V]] to i8
-; CHECK-NEXT: [[CAST:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[CAST]], 60
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Z]], -7
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP1]], -4
; CHECK-NEXT: ret i1 [[CMP]]
;
%z = zext i4 %v to i8
@@ -262,9 +261,8 @@ define i1 @sge_invertible_tail_of_zext_mul_plus_offset(i4 %x) {
; CHECK-LABEL: define i1 @sge_invertible_tail_of_zext_mul_plus_offset(
; CHECK-SAME: i4 [[X:%.*]]) {
; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT: [[A:%.*]] = sub i8 [[M]], 100
-; CHECK-NEXT: [[CMP:%.*]] = icmp sge i8 [[A]], 10
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Z]], -6
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP1]], 6
; CHECK-NEXT: ret i1 [[CMP]]
;
%z = zext i4 %x to i8
@@ -283,9 +281,8 @@ define i1 @ult_invertible_zext_mul_plus_offset(i4 %x) {
; CHECK-LABEL: define i1 @ult_invertible_zext_mul_plus_offset(
; CHECK-SAME: i4 [[X:%.*]]) {
; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT: [[A:%.*]] = add i8 [[M]], -100
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[A]], 100
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Z]], -5
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 5
; CHECK-NEXT: ret i1 [[TMP2]]
;
%z = zext i4 %x to i8
@@ -304,9 +301,8 @@ define i1 @slt_inverse_invertible_zext_mul_plus_offset(i4 %x) {
; CHECK-LABEL: define i1 @slt_inverse_invertible_zext_mul_plus_offset(
; CHECK-SAME: i4 [[X:%.*]]) {
; CHECK-NEXT: [[Z:%.*]] = zext i4 [[X]] to i8
-; CHECK-NEXT: [[M:%.*]] = mul i8 [[Z]], 20
-; CHECK-NEXT: [[A:%.*]] = add i8 [[M]], 6
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[A]], 66
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[Z]], -7
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP1]], -4
; CHECK-NEXT: ret i1 [[CMP]]
;
%z = zext i4 %x to i8
@@ -357,8 +353,8 @@ define i1 @sccp_sext_mul_shrinks_to_negative_suffix(i4 %x) {
define i1 @sccp_mul_wraps_once_shrinks_to_middle_window(i8 range(i8 0, 18) %x) {
; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_middle_window(
; CHECK-SAME: i8 range(i8 0, 18) [[X:%.*]]) {
-; CHECK-NEXT: [[M:%.*]] = mul i8 [[X]], 20
-; CHECK-NEXT: [[TMP2:%.*]] = icmp uge i8 [[M]], 100
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -5
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 8
; CHECK-NEXT: ret i1 [[TMP2]]
;
%m = mul i8 %x, 20
@@ -369,7 +365,7 @@ define i1 @sccp_mul_wraps_once_shrinks_to_middle_window(i8 range(i8 0, 18) %x) {
define i1 @sccp_urem_wraps_once_shrinks_to_middle_window(i8 range(i8 8, 19) %x) {
; CHECK-LABEL: define i1 @sccp_urem_wraps_once_shrinks_to_middle_window(
; CHECK-SAME: i8 range(i8 8, 19) [[X:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = urem i8 [[X]], 10
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -10
; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 3
; CHECK-NEXT: ret i1 [[TMP2]]
;
@@ -381,8 +377,8 @@ define i1 @sccp_urem_wraps_once_shrinks_to_middle_window(i8 range(i8 8, 19) %x)
define i1 @sccp_negative_mul_wraps_once_shrinks_to_middle_window(i8 range(i8 3, 17) %x) {
; CHECK-LABEL: define i1 @sccp_negative_mul_wraps_once_shrinks_to_middle_window(
; CHECK-SAME: i8 range(i8 3, 17) [[X:%.*]]) {
-; CHECK-NEXT: [[M:%.*]] = mul i8 [[X]], -20
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[M]], 60
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -10
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 3
; CHECK-NEXT: ret i1 [[TMP2]]
;
%m = mul i8 %x, -20
@@ -393,8 +389,8 @@ define i1 @sccp_negative_mul_wraps_once_shrinks_to_middle_window(i8 range(i8 3,
define i1 @sccp_shl_wraps_once_shrinks_to_upper_window(i8 range(i8 5, 23) %x) {
; CHECK-LABEL: define i1 @sccp_shl_wraps_once_shrinks_to_upper_window(
; CHECK-SAME: i8 range(i8 5, 23) [[X:%.*]]) {
-; CHECK-NEXT: [[Y:%.*]] = shl i8 [[X]], 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp uge i8 [[Y]], -96
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -10
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i8 [[TMP1]], 6
; CHECK-NEXT: ret i1 [[TMP2]]
;
%y = shl i8 %x, 4
@@ -405,9 +401,8 @@ define i1 @sccp_shl_wraps_once_shrinks_to_upper_window(i8 range(i8 5, 23) %x) {
define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window1(i8 range(i8 2, 6) %x) {
; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window1(
; CHECK-SAME: i8 range(i8 2, 6) [[X:%.*]]) {
-; CHECK-NEXT: [[M:%.*]] = mul i8 [[X]], 100
-; CHECK-NEXT: [[A:%.*]] = add i8 [[M]], 56
-; CHECK-NEXT: [[CMP:%.*]] = icmp uge i8 [[A]], 100
+; CHECK-NEXT: [[TMP1:%.*]] = add i8 [[X]], -3
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[TMP1]], 2
; CHECK-NEXT: ret i1 [[CMP]]
;
%m = mul i8 %x, 100
@@ -434,8 +429,7 @@ define i1 @sccp_mul_wraps_once_shrinks_to_two_value_window2(i8 range(i8 -2, 2) %
define i1 @sccp_mul_wraps_once_shrinks_to_singleton(i8 range(i8 0, 18) %x) {
; CHECK-LABEL: define i1 @sccp_mul_wraps_once_shrinks_to_singleton(
; CHECK-SAME: i8 range(i8 0, 18) [[X:%.*]]) {
-; CHECK-NEXT: [[M:%.*]] = mul i8 [[X]], 20
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[M]], -116
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 [[X]], 7
; CHECK-NEXT: ret i1 [[TMP1]]
;
%m = mul i8 %x, 20
@@ -525,8 +519,8 @@ define i1 @sccp_mul_wraps_once_shrinks_to_wrapped_domain_singleton(i8 range(i8 2
define <4 x i1> @vec_splat_and(<4 x i32> range(i32 100, 456) %x) {
; CHECK-LABEL: define <4 x i1> @vec_splat_and(
; CHECK-SAME: <4 x i32> range(i32 100, 456) [[X:%.*]]) {
-; CHECK-NEXT: [[M:%.*]] = and <4 x i32> [[X]], splat (i32 255)
-; CHECK-NEXT: [[CMP:%.*]] = icmp uge <4 x i32> [[M]], splat (i32 230)
+; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X]], splat (i32 -230)
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 26)
; CHECK-NEXT: ret <4 x i1> [[CMP]]
;
%m = and <4 x i32> %x, splat (i32 255)
@@ -537,9 +531,8 @@ define <4 x i1> @vec_splat_and(<4 x i32> range(i32 100, 456) %x) {
define <4 x i1> @vec_splat_mul(<4 x i8> range(i8 10, 17) %x) {
; CHECK-LABEL: define <4 x i1> @vec_splat_mul(
; CHECK-SAME: <4 x i8> range(i8 10, 17) [[X:%.*]]) {
-; CHECK-NEXT: [[M:%.*]] = mul <4 x i8> [[X]], splat (i8 50)
-; CHECK-NEXT: [[ADD:%.*]] = add <4 x i8> [[M]], splat (i8 -40)
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i8> [[ADD]], splat (i8 110)
+; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i8> [[X]], splat (i8 -12)
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i8> [[TMP1]], splat (i8 2)
; CHECK-NEXT: ret <4 x i1> [[CMP]]
;
%m = mul <4 x i8> %x, splat (i8 50)
@@ -551,8 +544,8 @@ define <4 x i1> @vec_splat_mul(<4 x i8> range(i8 10, 17) %x) {
define <4 x i1> @vec_splat_urem(<4 x i32> range(i32 1, 10) %x) {
; CHECK-LABEL: define <4 x i1> @vec_splat_urem(
; CHECK-SAME: <4 x i32> range(i32 1, 10) [[X:%.*]]) {
-; CHECK-NEXT: [[M:%.*]] = urem <4 x i32> [[X]], splat (i32 8)
-; CHECK-NEXT: [[CMP:%.*]] = icmp uge <4 x i32> [[M]], splat (i32 2)
+; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X]], splat (i32 -2)
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i32> [[TMP1]], splat (i32 6)
; CHECK-NEXT: ret <4 x i1> [[CMP]]
;
%m = urem <4 x i32> %x, splat (i32 8)
@@ -563,8 +556,7 @@ define <4 x i1> @vec_splat_urem(<4 x i32> range(i32 1, 10) %x) {
define <4 x i1> @vec_splat_shl(<4 x i8> range(i8 1, 4) %x) {
; CHECK-LABEL: define <4 x i1> @vec_splat_shl(
; CHECK-SAME: <4 x i8> range(i8 1, 4) [[X:%.*]]) {
-; CHECK-NEXT: [[M:%.*]] = shl <4 x i8> [[X]], splat (i8 7)
-; CHECK-NEXT: [[CMP:%.*]] = icmp uge <4 x i8> [[M]], splat (i8 100)
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne <4 x i8> [[X]], splat (i8 2)
; CHECK-NEXT: ret <4 x i1> [[CMP]]
;
%m = shl <4 x i8> %x, splat (i8 7)
>From 16807082b1f97315452ba82cae259594704670e2 Mon Sep 17 00:00:00 2001
From: Camsyn <camsyn at foxmail.com>
Date: Thu, 26 Mar 2026 01:50:09 +0800
Subject: [PATCH 9/9] Update other tests
---
llvm/test/Transforms/PhaseOrdering/cmp-logic.ll | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll b/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll
index 04eae7d2941d8..72d7651509f9e 100644
--- a/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll
+++ b/llvm/test/Transforms/PhaseOrdering/cmp-logic.ll
@@ -111,8 +111,7 @@ define i32 @PR56119(i32 %e.coerce) {
; O1-LABEL: @PR56119(
; O1-NEXT: entry:
; O1-NEXT: [[CONV2:%.*]] = and i32 [[E_COERCE:%.*]], 255
-; O1-NEXT: [[REM:%.*]] = urem i32 [[CONV2]], 255
-; O1-NEXT: [[CMP:%.*]] = icmp eq i32 [[REM]], 7
+; O1-NEXT: [[CMP:%.*]] = icmp eq i32 [[CONV2]], 7
; O1-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
; O1: if.then:
; O1-NEXT: tail call void (...) @foo()
More information about the llvm-commits
mailing list