[llvm] [InstCombine] When canonicalizing clamp like, also consider certain sgt/slt cases (PR #153240)
Ryan Buchner via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 19 11:53:13 PDT 2025
https://github.com/bababuck updated https://github.com/llvm/llvm-project/pull/153240
>From 46767d0388146e7022234ccff71bd6651de3dead Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Fri, 8 Aug 2025 10:29:44 -0700
Subject: [PATCH 1/8] Add test for clamp like case where the first select is a
([s/z]ext (icmp))
This ([s/z]ext (icmp)) is equivilant to (select icmp, [1/-1], 0), so can be
canonicalized too. This is generally not beneficial because after the canonicalization
we lose the ability replace one of the selects with ([s/z]ext (icmp)). However, it is
beneficial for this particular case:
```
%old_cmp1 = icmp sgt i32 %x, C2
%old_replacement = sext i1 %old_cmp1 to i32
%old_cmp0 = icmp ult i32 %x, C0
%r = select i1 %old_cmp0, i32 %x, i32 %old_replacement
it can be rewriten as more canonical pattern:
%new_cmp2 = icmp sge i32 %x, C0
%new_clamped_low = smax i32 %target_low, i32 %x
%r = select i1 %new_cmp2, i32 -1, i32 %new_clamped_low
Iff 0 s<= C2 s<= C0
```
The select can be lowered to:
```
%sext_cmp2 = sext i1 %new_cmp2 to i32
%r = or i32 %sext_cmp2, i32 %new_clamped_low
```
---
.../canonicalize-clamp-like-pattern-i1.ll | 163 ++++++++++++++++++
1 file changed, 163 insertions(+)
create mode 100644 llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll
new file mode 100644
index 0000000000000..98b38e819d242
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes='instcombine<no-verify-fixpoint>' -S | FileCheck %s
+
+; Given a pattern like:
+; %old_cmp1 = icmp sgt i32 %x, C2
+; %old_replacement = sext i1 %old_cmp1 to i32
+; %old_cmp0 = icmp ult i32 %x, C0
+; %r = select i1 %old_cmp0, i32 %x, i32 %old_replacement
+; it can be rewriten as more canonical pattern:
+; %new_cmp2 = icmp sge i32 %x, C0
+; %new_clamped_low = smax i32 %target_low, i32 %x
+; %r = select i1 %new_cmp2, i32 -1, i32 %new_clamped_low
+; Iff 0 s<= C2 s<= C0
+; Also, ULT predicate can also be UGE; or UGT iff C0 != -1 (+invert result)
+; Also, SLT predicate can also be SGE; or SGT iff C2 != INT_MAX (+invert res.)
+
+;-------------------------------------------------------------------------------
+
+; clamp-like max case, can be optimized with max
+define i32 @clamp_max_sgt(i32 %x) {
+; CHECK-LABEL: @clamp_max_sgt(
+; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
+; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
+; CHECK-NEXT: ret i32 [[COND3]]
+;
+ %or.cond = icmp ult i32 %x, 256
+ %cmp2 = icmp sgt i32 %x, 0
+ %cond = sext i1 %cmp2 to i32
+ %cond3 = select i1 %or.cond, i32 %x, i32 %cond
+ ret i32 %cond3
+}
+
+; clamp-like max case with vector, can be optimized with max
+define <2 x i32> @clamp_max_sgt_vec(<2 x i32> %x) {
+; CHECK-LABEL: @clamp_max_sgt_vec(
+; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 100, i32 256>
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt <2 x i32> [[X]], <i32 98, i32 254>
+; CHECK-NEXT: [[COND:%.*]] = sext <2 x i1> [[CMP2]] to <2 x i32>
+; CHECK-NEXT: [[COND3:%.*]] = select <2 x i1> [[OR_COND]], <2 x i32> [[X]], <2 x i32> [[COND]]
+; CHECK-NEXT: ret <2 x i32> [[COND3]]
+;
+ %or.cond = icmp ult <2 x i32> %x, <i32 100, i32 256>
+ %cmp2 = icmp sgt <2 x i32> %x, <i32 98, i32 254>
+ %cond = sext <2 x i1> %cmp2 to <2 x i32>
+ %cond3 = select <2 x i1> %or.cond, <2 x i32> %x, <2 x i32> %cond
+ ret <2 x i32> %cond3
+}
+
+; Not clamp-like vector
+define <2 x i32> @clamp_max_vec(<2 x i32> %x) {
+; CHECK-LABEL: @clamp_max_vec(
+; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 100, i32 256>
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt <2 x i32> [[X]], <i32 128, i32 0>
+; CHECK-NEXT: [[COND:%.*]] = sext <2 x i1> [[CMP2]] to <2 x i32>
+; CHECK-NEXT: [[COND3:%.*]] = select <2 x i1> [[OR_COND]], <2 x i32> [[X]], <2 x i32> [[COND]]
+; CHECK-NEXT: ret <2 x i32> [[COND3]]
+;
+ %or.cond = icmp ult <2 x i32> %x, <i32 100, i32 256>
+ %cmp2 = icmp sgt <2 x i32> %x, <i32 128, i32 0>
+ %cond = sext <2 x i1> %cmp2 to <2 x i32>
+ %cond3 = select <2 x i1> %or.cond, <2 x i32> %x, <2 x i32> %cond
+ ret <2 x i32> %cond3
+}
+
+; clamp-like max case, can be optimized with max
+define i32 @clamp_max_sgt_neg1(i32 %x) {
+; CHECK-LABEL: @clamp_max_sgt_neg1(
+; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], -1
+; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
+; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
+; CHECK-NEXT: ret i32 [[COND3]]
+;
+ %or.cond = icmp ult i32 %x, 256
+ %cmp2 = icmp sgt i32 %x, -1
+ %cond = sext i1 %cmp2 to i32
+ %cond3 = select i1 %or.cond, i32 %x, i32 %cond
+ ret i32 %cond3
+}
+
+; clamp-like max case, can be optimized with max
+define i32 @clamp_max_sge(i32 %x) {
+; CHECK-LABEL: @clamp_max_sge(
+; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], -1
+; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
+; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
+; CHECK-NEXT: ret i32 [[COND3]]
+;
+ %or.cond = icmp ult i32 %x, 256
+ %cmp2 = icmp sge i32 %x, 0
+ %cond = sext i1 %cmp2 to i32
+ %cond3 = select i1 %or.cond, i32 %x, i32 %cond
+ ret i32 %cond3
+}
+
+; Don't support SLT cases, need to select 0 as the low value, -1 as high value
+define i32 @clamp_max_slt(i32 %x) {
+; CHECK-LABEL: @clamp_max_slt(
+; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
+; CHECK-NEXT: [[COND:%.*]] = ashr i32 [[X]], 31
+; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
+; CHECK-NEXT: ret i32 [[COND3]]
+;
+ %or.cond = icmp ult i32 %x, 256
+ %cmp2 = icmp slt i32 %x, 0
+ %cond = sext i1 %cmp2 to i32
+ %cond3 = select i1 %or.cond, i32 %x, i32 %cond
+ ret i32 %cond3
+}
+
+; Don't support SLE cases, need to select 0 as the low value, -1 as high value
+define i32 @clamp_max_sle(i32 %x) {
+; CHECK-LABEL: @clamp_max_sle(
+; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
+; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[X]], 1
+; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
+; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
+; CHECK-NEXT: ret i32 [[COND3]]
+;
+ %or.cond = icmp ult i32 %x, 256
+ %cmp2 = icmp sle i32 %x, 0
+ %cond = sext i1 %cmp2 to i32
+ %cond3 = select i1 %or.cond, i32 %x, i32 %cond
+ ret i32 %cond3
+}
+
+; Not selecting between 0, x, and -1, so can't be optimized with max
+; Select between 0, x, and 1
+define i32 @clamp_max_bad_values(i32 %x) {
+; CHECK-LABEL: @clamp_max_bad_values(
+; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], 0
+; CHECK-NEXT: [[COND:%.*]] = zext i1 [[CMP2]] to i32
+; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
+; CHECK-NEXT: ret i32 [[COND3]]
+;
+ %or.cond = icmp ult i32 %x, 256
+ %cmp2 = icmp sgt i32 %x, 0
+ %cond = zext i1 %cmp2 to i32
+ %cond3 = select i1 %or.cond, i32 %x, i32 %cond
+ ret i32 %cond3
+}
+
+; Boundaries of range are not 0 and x (x is some positive integer)
+define i32 @clamp_max_offset(i32 %x) {
+; CHECK-LABEL: @clamp_max_offset(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -10
+; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[TMP1]], 246
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], 10
+; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
+; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
+; CHECK-NEXT: ret i32 [[COND3]]
+;
+ %1 = add i32 %x, -10
+ %or.cond = icmp ult i32 %1, 246
+ %cmp2 = icmp sgt i32 %x, 10
+ %cond = sext i1 %cmp2 to i32
+ %cond3 = select i1 %or.cond, i32 %x, i32 %cond
+ ret i32 %cond3
+}
>From cdfaef9d837984895c035443be8965f963ab9417 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Fri, 8 Aug 2025 10:29:53 -0700
Subject: [PATCH 2/8] [InstCombine] When canoncicalizing clamp like, also
consider certain sgt/slt cases
In particular, when %target_low=0 and %target_high=-1 and C1=0
```
%old_cmp1 = icmp slt %x, C2
%old_replacement = select %old_cmp1, %target_low, %target_high
```
might have aleady been combined into
```
%old_cmp1 = icmp sgt %x, C2
%old_replacement = sext %old_cmp1
```
For this particular case, the canonacalization allows for a more optimized sequence
utilizing `max` to be created.
```
%old_cmp1 = icmp sgt %x, C2
%old_replacement = sext %old_cmp1
%old_cmp0 = icmp ult i32 %x, C0
%r = select i1 %old_cmp0, i32 %x, i32 %old_replacement
```
If 0 s<= C2 s<= C0, can be re-written as:
```
%new_cmp1 = icmp slt i32 %x, 0
%new_cmp2 = icmp sge i32 %x, C0
%new_clamped_low = select i1 %new_cmp1, i32 0, i32 %x
%r = select i1 %new_cmp2, i32 -1, i32 %new_clamped_low
```
Can be re-written as (already occurs from the canonicalized version):
```
%clamped_low = max i32 %x, 0
%new_cmp2 = icmp sge i32 %x, C0
%sext = sext i1 %new_cmp2
%r = or i32 %sext, %new_cmp2
```
---
.../InstCombine/InstCombineSelect.cpp | 47 ++++++++++++++++---
.../canonicalize-clamp-like-pattern-i1.ll | 28 +++++------
2 files changed, 52 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index eb4332fbc0959..4471e0ebfe025 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1521,10 +1521,17 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C0))))
return nullptr;
- if (!isa<SelectInst>(Sel1)) {
- Pred0 = ICmpInst::getInversePredicate(Pred0);
- std::swap(X, Sel1);
- }
+ auto SwapSelectOperands = [](ICmpInst::Predicate &Pred, Value *&Op0,
+ Value *&Op1) -> void {
+ std::swap(Op0, Op1);
+ Pred = ICmpInst::getInversePredicate(Pred);
+ };
+
+ if (!isa<SelectInst>(Sel1))
+ SwapSelectOperands(Pred0, Sel1, X);
+
+ if (!isa<SelectInst>(Sel1) && !isa<SExtInst>(Sel1))
+ SwapSelectOperands(Pred0, Sel1, X);
// Canonicalize Cmp0 into ult or uge.
// FIXME: we shouldn't care about lanes that are 'undef' in the end?
@@ -1575,17 +1582,30 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C1)))))
return nullptr;
+ // Will create Replacement[Low/High] later for SExtICmp case
Value *Cmp1;
CmpPredicate Pred1;
Constant *C2;
Value *ReplacementLow, *ReplacementHigh;
- if (!match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
- m_Value(ReplacementHigh))) ||
+ bool FoldSExtICmp;
+ auto MatchSExtICmp = [](Value *PossibleSextIcmp, Value *&Cmp1) -> bool {
+ Value *ICmpOp0, *ICmpOp1;
+ return match(PossibleSextIcmp, m_SExt(m_Value(Cmp1))) &&
+ match(Cmp1, m_ICmp(m_Value(ICmpOp0), m_Value(ICmpOp1)));
+ };
+ if (!((FoldSExtICmp = MatchSExtICmp(Sel1, Cmp1)) ||
+ match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
+ m_Value(ReplacementHigh)))) ||
!match(Cmp1,
m_ICmp(Pred1, m_Specific(X),
m_CombineAnd(m_AnyIntegralConstant(), m_Constant(C2)))))
return nullptr;
+ // When folding sext-icmp, only efficient if C1 = 0 so we can make use of the
+ // `smax` instruction
+ if (FoldSExtICmp && !C1->isZeroValue())
+ return nullptr;
+
if (!Cmp1->hasOneUse() && (Cmp00 == X || !Cmp00->hasOneUse()))
return nullptr; // Not enough one-use instructions for the fold.
// FIXME: this restriction could be relaxed if Cmp1 can be reused as one of
@@ -1593,8 +1613,13 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
// Canonicalize Cmp1 into the form we expect.
// FIXME: we shouldn't care about lanes that are 'undef' in the end?
+ bool SwapReplacement = false;
switch (Pred1) {
case ICmpInst::Predicate::ICMP_SLT:
+ // The sext(icmp) case only is advantageous for SGT/SGTE since that enables
+ // max conversion
+ if (FoldSExtICmp)
+ return nullptr;
break;
case ICmpInst::Predicate::ICMP_SLE:
// We'd have to increment C2 by one, and for that it must not have signed
@@ -1615,7 +1640,7 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
// Also non-canonical, but here we don't need to change C2,
// so we don't have any restrictions on C2, so we can just handle it.
Pred1 = ICmpInst::Predicate::ICMP_SLT;
- std::swap(ReplacementLow, ReplacementHigh);
+ SwapReplacement = true;
break;
default:
return nullptr; // Unknown predicate.
@@ -1644,6 +1669,14 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
if (!Precond2 || !match(Precond2, m_One()))
return nullptr;
+ if (FoldSExtICmp) {
+ ReplacementLow = Constant::getAllOnesValue(Sel1->getType());
+ ReplacementHigh = Constant::getNullValue(Sel1->getType());
+ }
+
+ if (SwapReplacement)
+ std::swap(ReplacementLow, ReplacementHigh);
+
// If we are matching from a truncated input, we need to sext the
// ReplacementLow and ReplacementHigh values. Only do the transform if they
// are free to extend due to being constants.
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll
index 98b38e819d242..062c7b477a44b 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-clamp-like-pattern-i1.ll
@@ -19,10 +19,9 @@
; clamp-like max case, can be optimized with max
define i32 @clamp_max_sgt(i32 %x) {
; CHECK-LABEL: @clamp_max_sgt(
-; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
-; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], 0
-; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
-; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
+; CHECK-NEXT: [[COND3:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[TMP2]]
; CHECK-NEXT: ret i32 [[COND3]]
;
%or.cond = icmp ult i32 %x, 256
@@ -35,10 +34,9 @@ define i32 @clamp_max_sgt(i32 %x) {
; clamp-like max case with vector, can be optimized with max
define <2 x i32> @clamp_max_sgt_vec(<2 x i32> %x) {
; CHECK-LABEL: @clamp_max_sgt_vec(
-; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult <2 x i32> [[X:%.*]], <i32 100, i32 256>
-; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt <2 x i32> [[X]], <i32 98, i32 254>
-; CHECK-NEXT: [[COND:%.*]] = sext <2 x i1> [[CMP2]] to <2 x i32>
-; CHECK-NEXT: [[COND3:%.*]] = select <2 x i1> [[OR_COND]], <2 x i32> [[X]], <2 x i32> [[COND]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <2 x i32> [[X:%.*]], <i32 99, i32 255>
+; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[X]], <2 x i32> zeroinitializer)
+; CHECK-NEXT: [[COND3:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> splat (i32 -1), <2 x i32> [[TMP2]]
; CHECK-NEXT: ret <2 x i32> [[COND3]]
;
%or.cond = icmp ult <2 x i32> %x, <i32 100, i32 256>
@@ -67,10 +65,9 @@ define <2 x i32> @clamp_max_vec(<2 x i32> %x) {
; clamp-like max case, can be optimized with max
define i32 @clamp_max_sgt_neg1(i32 %x) {
; CHECK-LABEL: @clamp_max_sgt_neg1(
-; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
-; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], -1
-; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
-; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
+; CHECK-NEXT: [[COND3:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[TMP2]]
; CHECK-NEXT: ret i32 [[COND3]]
;
%or.cond = icmp ult i32 %x, 256
@@ -83,10 +80,9 @@ define i32 @clamp_max_sgt_neg1(i32 %x) {
; clamp-like max case, can be optimized with max
define i32 @clamp_max_sge(i32 %x) {
; CHECK-LABEL: @clamp_max_sge(
-; CHECK-NEXT: [[OR_COND:%.*]] = icmp ult i32 [[X:%.*]], 256
-; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[X]], -1
-; CHECK-NEXT: [[COND:%.*]] = sext i1 [[CMP2]] to i32
-; CHECK-NEXT: [[COND3:%.*]] = select i1 [[OR_COND]], i32 [[X]], i32 [[COND]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[X:%.*]], 255
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.smax.i32(i32 [[X]], i32 0)
+; CHECK-NEXT: [[COND3:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[TMP2]]
; CHECK-NEXT: ret i32 [[COND3]]
;
%or.cond = icmp ult i32 %x, 256
>From 3b55bb98c87d8530947058181e782662469d2ddd Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 18 Aug 2025 16:03:06 -0700
Subject: [PATCH 3/8] Simplify pattern matching
---
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 4471e0ebfe025..e048578ef206c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1589,9 +1589,7 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
Value *ReplacementLow, *ReplacementHigh;
bool FoldSExtICmp;
auto MatchSExtICmp = [](Value *PossibleSextIcmp, Value *&Cmp1) -> bool {
- Value *ICmpOp0, *ICmpOp1;
- return match(PossibleSextIcmp, m_SExt(m_Value(Cmp1))) &&
- match(Cmp1, m_ICmp(m_Value(ICmpOp0), m_Value(ICmpOp1)));
+ return match(PossibleSextIcmp, m_SExt(m_Value(Cmp1, m_ICmp(m_Value(), m_Value()))));
};
if (!((FoldSExtICmp = MatchSExtICmp(Sel1, Cmp1)) ||
match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
>From 35eaeb2b79443b171a51ed8efda60144f4e15799 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Mon, 18 Aug 2025 16:06:43 -0700
Subject: [PATCH 4/8] Formating
---
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index e048578ef206c..b74597f88c4c9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1589,7 +1589,8 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
Value *ReplacementLow, *ReplacementHigh;
bool FoldSExtICmp;
auto MatchSExtICmp = [](Value *PossibleSextIcmp, Value *&Cmp1) -> bool {
- return match(PossibleSextIcmp, m_SExt(m_Value(Cmp1, m_ICmp(m_Value(), m_Value()))));
+ return match(PossibleSextIcmp,
+ m_SExt(m_Value(Cmp1, m_ICmp(m_Value(), m_Value()))));
};
if (!((FoldSExtICmp = MatchSExtICmp(Sel1, Cmp1)) ||
match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
>From 73f7dcc8e53d6447575123654764a4bc5d4efebe Mon Sep 17 00:00:00 2001
From: Ryan Buchner <buchner.ryan at gmail.com>
Date: Tue, 19 Aug 2025 11:37:02 -0700
Subject: [PATCH 5/8] Update
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
Co-authored-by: Nikita Popov <github at npopov.com>
---
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index b74597f88c4c9..02efe92794f7f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1587,12 +1587,9 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
CmpPredicate Pred1;
Constant *C2;
Value *ReplacementLow, *ReplacementHigh;
- bool FoldSExtICmp;
- auto MatchSExtICmp = [](Value *PossibleSextIcmp, Value *&Cmp1) -> bool {
- return match(PossibleSextIcmp,
+ bool FoldSExtICmp = match(Sel1,
m_SExt(m_Value(Cmp1, m_ICmp(m_Value(), m_Value()))));
- };
- if (!((FoldSExtICmp = MatchSExtICmp(Sel1, Cmp1)) ||
+ if (!(FoldSelectICmp ||
match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
m_Value(ReplacementHigh)))) ||
!match(Cmp1,
>From 4456779fea0fae939d8834694dd34688e714082c Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 19 Aug 2025 11:39:44 -0700
Subject: [PATCH 6/8] Formatting
---
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 02efe92794f7f..785eb7a59987e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1587,8 +1587,8 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
CmpPredicate Pred1;
Constant *C2;
Value *ReplacementLow, *ReplacementHigh;
- bool FoldSExtICmp = match(Sel1,
- m_SExt(m_Value(Cmp1, m_ICmp(m_Value(), m_Value()))));
+ bool FoldSExtICmp =
+ match(Sel1, m_SExt(m_Value(Cmp1, m_ICmp(m_Value(), m_Value()))));
if (!(FoldSelectICmp ||
match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
m_Value(ReplacementHigh)))) ||
>From 9f930fc946f4fae9b6c33eebcd2c921e5c20540c Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 19 Aug 2025 11:49:52 -0700
Subject: [PATCH 7/8] Fix typo
---
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 785eb7a59987e..35b1afd53ad47 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1589,7 +1589,7 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
Value *ReplacementLow, *ReplacementHigh;
bool FoldSExtICmp =
match(Sel1, m_SExt(m_Value(Cmp1, m_ICmp(m_Value(), m_Value()))));
- if (!(FoldSelectICmp ||
+ if (!(FoldSExtICmp ||
match(Sel1, m_Select(m_Value(Cmp1), m_Value(ReplacementLow),
m_Value(ReplacementHigh)))) ||
!match(Cmp1,
>From a14b782f37ba1a6048a2d8eddc03165de7744704 Mon Sep 17 00:00:00 2001
From: bababuck <buchner.ryan at gmail.com>
Date: Tue, 19 Aug 2025 11:50:47 -0700
Subject: [PATCH 8/8] Remove SwapReplacement logic
Can just intialize high/low for foldsext case already swapped.
---
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 10 +++-------
1 file changed, 3 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 35b1afd53ad47..e55b77a771859 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1609,7 +1609,6 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
// Canonicalize Cmp1 into the form we expect.
// FIXME: we shouldn't care about lanes that are 'undef' in the end?
- bool SwapReplacement = false;
switch (Pred1) {
case ICmpInst::Predicate::ICMP_SLT:
// The sext(icmp) case only is advantageous for SGT/SGTE since that enables
@@ -1636,7 +1635,7 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
// Also non-canonical, but here we don't need to change C2,
// so we don't have any restrictions on C2, so we can just handle it.
Pred1 = ICmpInst::Predicate::ICMP_SLT;
- SwapReplacement = true;
+ std::swap(ReplacementLow, ReplacementHigh);
break;
default:
return nullptr; // Unknown predicate.
@@ -1666,13 +1665,10 @@ static Value *canonicalizeClampLike(SelectInst &Sel0, ICmpInst &Cmp0,
return nullptr;
if (FoldSExtICmp) {
- ReplacementLow = Constant::getAllOnesValue(Sel1->getType());
- ReplacementHigh = Constant::getNullValue(Sel1->getType());
+ ReplacementHigh = Constant::getAllOnesValue(Sel1->getType());
+ ReplacementLow = Constant::getNullValue(Sel1->getType());
}
- if (SwapReplacement)
- std::swap(ReplacementLow, ReplacementHigh);
-
// If we are matching from a truncated input, we need to sext the
// ReplacementLow and ReplacementHigh values. Only do the transform if they
// are free to extend due to being constants.
More information about the llvm-commits
mailing list