[llvm] [ValueTracking] Fold max/min when incrementing/decrementing by 1 (PR #142466)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 3 10:24:14 PDT 2025
https://github.com/AlexMaclean updated https://github.com/llvm/llvm-project/pull/142466
>From 3cf60d1bc2491cb8ec0ee56e813e7dc6f8e7aed6 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Sun, 1 Jun 2025 16:30:05 +0000
Subject: [PATCH 1/6] pre-commit tests
---
.../Transforms/InstCombine/minmax-fold.ll | 40 +++++++++++++++++++
1 file changed, 40 insertions(+)
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index cd376b74fb36c..799efe47c1ff9 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -1596,3 +1596,43 @@ define <2 x i32> @test_umax_smax_vec_neg(<2 x i32> %x) {
%umax = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %smax, <2 x i32> <i32 1, i32 10>)
ret <2 x i32> %umax
}
+
+define i32 @test_smin_sub1_nsw(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smin_sub1_nsw(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[W:%.*]]
+; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[W]], -1
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp slt i32 %x, %w
+ %sub = add nsw i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
+
+define i32 @test_smax_add1_nsw(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smax_add1_nsw(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[W:%.*]]
+; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[W]], 1
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[X2]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp sgt i32 %x, %w
+ %add = add nsw i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
+define i32 @test_umax_add1_nsw(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_umax_add1_nsw(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], [[W:%.*]]
+; CHECK-NEXT: [[X2:%.*]] = add nuw i32 [[W]], 1
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[X2]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ugt i32 %x, %w
+ %add = add nuw i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
>From a77521b5fd5b2607c735068f60c8e8a92b85046d Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Sun, 1 Jun 2025 16:32:57 +0000
Subject: [PATCH 2/6] [ValueTracking] Fold max/min when
incrementing/decrementing by 1
---
llvm/lib/Analysis/ValueTracking.cpp | 18 ++++++++++++++++++
.../test/Transforms/InstCombine/minmax-fold.ll | 15 ++++++---------
2 files changed, 24 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index fc19b2ccf7964..416d586d52963 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -8388,6 +8388,24 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
}
}
+ // (X > Y) ? X : (Y - 1) ==> MIN(X, Y - 1)
+ // (X < Y) ? X : (Y + 1) ==> MAX(X, Y + 1)
+ // When overflow corresponding to the sign of the comparison is poison.
+ // Note that the UMIN case is not possible as we canonicalize to addition.
+ if (CmpLHS == TrueVal) {
+ if (Pred == CmpInst::ICMP_SGT &&
+ match(FalseVal, m_NSWAddLike(m_Specific(CmpRHS), m_ConstantInt<1>())))
+ return {SPF_SMAX, SPNB_NA, false};
+
+ if (Pred == CmpInst::ICMP_SLT &&
+ match(FalseVal, m_NSWAddLike(m_Specific(CmpRHS), m_ConstantInt<-1>())))
+ return {SPF_SMIN, SPNB_NA, false};
+
+ if (Pred == CmpInst::ICMP_UGT &&
+ match(FalseVal, m_NUWAddLike(m_Specific(CmpRHS), m_ConstantInt<1>())))
+ return {SPF_UMAX, SPNB_NA, false};
+ }
+
if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
return {SPF_UNKNOWN, SPNB_NA, false};
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index 799efe47c1ff9..cf3515614321d 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -1599,9 +1599,8 @@ define <2 x i32> @test_umax_smax_vec_neg(<2 x i32> %x) {
define i32 @test_smin_sub1_nsw(i32 %x, i32 %w) {
; CHECK-LABEL: @test_smin_sub1_nsw(
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[W:%.*]]
-; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[W]], -1
-; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[SUB]]
+; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[W:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[SUB]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp slt i32 %x, %w
@@ -1612,9 +1611,8 @@ define i32 @test_smin_sub1_nsw(i32 %x, i32 %w) {
define i32 @test_smax_add1_nsw(i32 %x, i32 %w) {
; CHECK-LABEL: @test_smax_add1_nsw(
-; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[W:%.*]]
-; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[W]], 1
-; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[X2]]
+; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[X2]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp sgt i32 %x, %w
@@ -1625,9 +1623,8 @@ define i32 @test_smax_add1_nsw(i32 %x, i32 %w) {
define i32 @test_umax_add1_nsw(i32 %x, i32 %w) {
; CHECK-LABEL: @test_umax_add1_nsw(
-; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], [[W:%.*]]
-; CHECK-NEXT: [[X2:%.*]] = add nuw i32 [[W]], 1
-; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[X2]]
+; CHECK-NEXT: [[X2:%.*]] = add nuw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[X2]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp ugt i32 %x, %w
>From 8e46c7b7706f7294536f6e03edcdb002c78c87b0 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 3 Jun 2025 14:50:52 +0000
Subject: [PATCH 3/6] address comments
---
llvm/lib/Analysis/ValueTracking.cpp | 17 +++--
.../Transforms/InstCombine/minmax-fold.ll | 71 +++++++++++++++++++
2 files changed, 83 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 416d586d52963..35ca41030963c 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -8390,20 +8390,27 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
// (X > Y) ? X : (Y - 1) ==> MIN(X, Y - 1)
// (X < Y) ? X : (Y + 1) ==> MAX(X, Y + 1)
- // When overflow corresponding to the sign of the comparison is poison.
- // Note that the UMIN case is not possible as we canonicalize to addition.
+ // This transformation is valid when overflow corresponding to the sign of
+ // the comparison is poison and we must drop the non-matching overflow flag.
+ // Note: that the UMIN case is not possible as we canonicalize to addition.
if (CmpLHS == TrueVal) {
if (Pred == CmpInst::ICMP_SGT &&
- match(FalseVal, m_NSWAddLike(m_Specific(CmpRHS), m_ConstantInt<1>())))
+ match(FalseVal, m_NSWAddLike(m_Specific(CmpRHS), m_One()))) {
+ cast<Instruction>(FalseVal)->setHasNoUnsignedWrap(false);
return {SPF_SMAX, SPNB_NA, false};
+ }
if (Pred == CmpInst::ICMP_SLT &&
- match(FalseVal, m_NSWAddLike(m_Specific(CmpRHS), m_ConstantInt<-1>())))
+ match(FalseVal, m_NSWAddLike(m_Specific(CmpRHS), m_AllOnes()))) {
+ cast<Instruction>(FalseVal)->setHasNoUnsignedWrap(false);
return {SPF_SMIN, SPNB_NA, false};
+ }
if (Pred == CmpInst::ICMP_UGT &&
- match(FalseVal, m_NUWAddLike(m_Specific(CmpRHS), m_ConstantInt<1>())))
+ match(FalseVal, m_NUWAddLike(m_Specific(CmpRHS), m_One()))) {
+ cast<Instruction>(FalseVal)->setHasNoSignedWrap(false);
return {SPF_UMAX, SPNB_NA, false};
+ }
}
if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index cf3515614321d..b2be2bde69440 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -1633,3 +1633,74 @@ define i32 @test_umax_add1_nsw(i32 %x, i32 %w) {
ret i32 %r
}
+define <2 x i16> @test_smin_sub1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
+; CHECK-LABEL: @test_smin_sub1_nsw_vec(
+; CHECK-NEXT: [[SUB:%.*]] = add nsw <2 x i16> [[W:%.*]], splat (i16 -1)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[SUB]])
+; CHECK-NEXT: ret <2 x i16> [[R]]
+;
+ %cmp = icmp slt <2 x i16> %x, %w
+ %sub = add nsw <2 x i16> %w, splat (i16 -1)
+ %r = select <2 x i1> %cmp, <2 x i16> %x, <2 x i16> %sub
+ ret <2 x i16> %r
+}
+
+define <2 x i16> @test_smax_add1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
+; CHECK-LABEL: @test_smax_add1_nsw_vec(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw <2 x i16> [[W:%.*]], splat (i16 1)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[ADD]])
+; CHECK-NEXT: ret <2 x i16> [[R]]
+;
+ %cmp = icmp sgt <2 x i16> %x, %w
+ %add = add nsw <2 x i16> %w, splat (i16 1)
+ %r = select <2 x i1> %cmp, <2 x i16> %x, <2 x i16> %add
+ ret <2 x i16> %r
+}
+
+define <2 x i16> @test_umax_add1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
+; CHECK-LABEL: @test_umax_add1_nsw_vec(
+; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i16> [[W:%.*]], splat (i16 1)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[ADD]])
+; CHECK-NEXT: ret <2 x i16> [[R]]
+;
+ %cmp = icmp ugt <2 x i16> %x, %w
+ %add = add nuw <2 x i16> %w, splat (i16 1)
+ %r = select <2 x i1> %cmp, <2 x i16> %x, <2 x i16> %add
+ ret <2 x i16> %r
+}
+
+define i32 @test_smin_sub1_nsw_drop_flags(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smin_sub1_nsw_drop_flags(
+; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[W:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[SUB]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp slt i32 %x, %w
+ %sub = add nsw nuw i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
+
+define i32 @test_smax_add1_nsw_drop_flags(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smax_add1_nsw_drop_flags(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[ADD]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp sgt i32 %x, %w
+ %add = add nsw nuw i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
+define i32 @test_umax_add1_nsw_drop_flags(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_umax_add1_nsw_drop_flags(
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[ADD]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ugt i32 %x, %w
+ %add = add nuw nsw i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
>From c8106aec7a7f05a49dd3b26b84a1cfd122adbb98 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 3 Jun 2025 16:13:46 +0000
Subject: [PATCH 4/6] address more comments
---
llvm/lib/Analysis/ValueTracking.cpp | 25 ------------
.../InstCombine/InstCombineSelect.cpp | 40 +++++++++++++++++++
.../Transforms/InstCombine/minmax-fold.ll | 4 +-
3 files changed, 42 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 35ca41030963c..fc19b2ccf7964 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -8388,31 +8388,6 @@ static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
}
}
- // (X > Y) ? X : (Y - 1) ==> MIN(X, Y - 1)
- // (X < Y) ? X : (Y + 1) ==> MAX(X, Y + 1)
- // This transformation is valid when overflow corresponding to the sign of
- // the comparison is poison and we must drop the non-matching overflow flag.
- // Note: that the UMIN case is not possible as we canonicalize to addition.
- if (CmpLHS == TrueVal) {
- if (Pred == CmpInst::ICMP_SGT &&
- match(FalseVal, m_NSWAddLike(m_Specific(CmpRHS), m_One()))) {
- cast<Instruction>(FalseVal)->setHasNoUnsignedWrap(false);
- return {SPF_SMAX, SPNB_NA, false};
- }
-
- if (Pred == CmpInst::ICMP_SLT &&
- match(FalseVal, m_NSWAddLike(m_Specific(CmpRHS), m_AllOnes()))) {
- cast<Instruction>(FalseVal)->setHasNoUnsignedWrap(false);
- return {SPF_SMIN, SPNB_NA, false};
- }
-
- if (Pred == CmpInst::ICMP_UGT &&
- match(FalseVal, m_NUWAddLike(m_Specific(CmpRHS), m_One()))) {
- cast<Instruction>(FalseVal)->setHasNoSignedWrap(false);
- return {SPF_UMAX, SPNB_NA, false};
- }
- }
-
if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
return {SPF_UNKNOWN, SPNB_NA, false};
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 3882d4cb59e01..6a41147b6b809 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -564,6 +564,43 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
return nullptr;
}
+/// Try to fold a select to a min/max intrinsic. Many cases are already handled
+/// by matchDecomposedSelectPattern but here we handle the cases where more
+/// exensive modification of the IR is required.
+static Value *foldSelectICmpMinMax(const ICmpInst *Cmp, Value *TVal,
+ Value *FVal,
+ InstCombiner::BuilderTy &Builder) {
+ const Value *CmpLHS = Cmp->getOperand(0);
+ const Value *CmpRHS = Cmp->getOperand(1);
+ const ICmpInst::Predicate Pred = Cmp->getPredicate();
+
+ // (X > Y) ? X : (Y - 1) ==> MIN(X, Y - 1)
+ // (X < Y) ? X : (Y + 1) ==> MAX(X, Y + 1)
+ // This transformation is valid when overflow corresponding to the sign of
+ // the comparison is poison and we must drop the non-matching overflow flag.
+ // Note: that the UMIN case is not possible as we canonicalize to addition.
+ if (CmpLHS == TVal) {
+ if (Pred == CmpInst::ICMP_SGT &&
+ match(FVal, m_NSWAddLike(m_Specific(CmpRHS), m_One()))) {
+ cast<Instruction>(FVal)->setHasNoUnsignedWrap(false);
+ return Builder.CreateBinaryIntrinsic(Intrinsic::smax, TVal, FVal);
+ }
+
+ if (Pred == CmpInst::ICMP_SLT &&
+ match(FVal, m_NSWAddLike(m_Specific(CmpRHS), m_AllOnes()))) {
+ cast<Instruction>(FVal)->setHasNoUnsignedWrap(false);
+ return Builder.CreateBinaryIntrinsic(Intrinsic::smin, TVal, FVal);
+ }
+
+ if (Pred == CmpInst::ICMP_UGT &&
+ match(FVal, m_NUWAddLike(m_Specific(CmpRHS), m_One()))) {
+ cast<Instruction>(FVal)->setHasNoSignedWrap(false);
+ return Builder.CreateBinaryIntrinsic(Intrinsic::umax, TVal, FVal);
+ }
+ }
+ return nullptr;
+}
+
/// We want to turn:
/// (select (icmp eq (and X, Y), 0), (and (lshr X, Z), 1), 1)
/// into:
@@ -1916,6 +1953,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
return &SI;
}
+ if (Value *V = foldSelectICmpMinMax(ICI, TrueVal, FalseVal, Builder))
+ return replaceInstUsesWith(SI, V);
+
if (Instruction *V =
foldSelectICmpAndAnd(SI.getType(), ICI, TrueVal, FalseVal, Builder))
return V;
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index b2be2bde69440..f76c5b520475b 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -1671,8 +1671,8 @@ define <2 x i16> @test_umax_add1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
define i32 @test_smin_sub1_nsw_drop_flags(i32 %x, i32 %w) {
; CHECK-LABEL: @test_smin_sub1_nsw_drop_flags(
-; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[W:%.*]], -1
-; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[SUB]])
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[W:%.*]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 -1
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp slt i32 %x, %w
>From 2856b5879156fbd790d6f130f7c7fec7c2c0a2a0 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 3 Jun 2025 16:54:05 +0000
Subject: [PATCH 5/6] address more comments
---
.../InstCombine/InstCombineSelect.cpp | 15 ++++--
.../Transforms/InstCombine/minmax-fold.ll | 53 ++++++++++++++++---
2 files changed, 57 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 6a41147b6b809..b7c72be6385ae 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -569,7 +569,8 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
/// exensive modification of the IR is required.
static Value *foldSelectICmpMinMax(const ICmpInst *Cmp, Value *TVal,
Value *FVal,
- InstCombiner::BuilderTy &Builder) {
+ InstCombiner::BuilderTy &Builder,
+ const SimplifyQuery &SQ) {
const Value *CmpLHS = Cmp->getOperand(0);
const Value *CmpRHS = Cmp->getOperand(1);
const ICmpInst::Predicate Pred = Cmp->getPredicate();
@@ -578,7 +579,6 @@ static Value *foldSelectICmpMinMax(const ICmpInst *Cmp, Value *TVal,
// (X < Y) ? X : (Y + 1) ==> MAX(X, Y + 1)
// This transformation is valid when overflow corresponding to the sign of
// the comparison is poison and we must drop the non-matching overflow flag.
- // Note: that the UMIN case is not possible as we canonicalize to addition.
if (CmpLHS == TVal) {
if (Pred == CmpInst::ICMP_SGT &&
match(FVal, m_NSWAddLike(m_Specific(CmpRHS), m_One()))) {
@@ -597,6 +597,15 @@ static Value *foldSelectICmpMinMax(const ICmpInst *Cmp, Value *TVal,
cast<Instruction>(FVal)->setHasNoSignedWrap(false);
return Builder.CreateBinaryIntrinsic(Intrinsic::umax, TVal, FVal);
}
+
+ // Note: We must use isKnownNonZero here because "sub nuw %x, 1" will be
+ // canonicalize to "add %x, -1" discarding the nuw flag.
+ if (Pred == CmpInst::ICMP_ULT &&
+ match(FVal, m_AddLike(m_Specific(CmpRHS), m_AllOnes())) &&
+ isKnownNonZero(CmpRHS, SQ)) {
+ cast<Instruction>(FVal)->setHasNoSignedWrap(false);
+ return Builder.CreateBinaryIntrinsic(Intrinsic::umin, TVal, FVal);
+ }
}
return nullptr;
}
@@ -1953,7 +1962,7 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
return &SI;
}
- if (Value *V = foldSelectICmpMinMax(ICI, TrueVal, FalseVal, Builder))
+ if (Value *V = foldSelectICmpMinMax(ICI, TrueVal, FalseVal, Builder, SQ))
return replaceInstUsesWith(SI, V);
if (Instruction *V =
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index f76c5b520475b..bf8eeb0618ea8 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -1621,10 +1621,10 @@ define i32 @test_smax_add1_nsw(i32 %x, i32 %w) {
ret i32 %r
}
-define i32 @test_umax_add1_nsw(i32 %x, i32 %w) {
-; CHECK-LABEL: @test_umax_add1_nsw(
-; CHECK-NEXT: [[X2:%.*]] = add nuw i32 [[W:%.*]], 1
-; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[X2]])
+define i32 @test_umax_add1_nuw(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_umax_add1_nuw(
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[ADD]])
; CHECK-NEXT: ret i32 [[R]]
;
%cmp = icmp ugt i32 %x, %w
@@ -1633,6 +1633,18 @@ define i32 @test_umax_add1_nsw(i32 %x, i32 %w) {
ret i32 %r
}
+define i32 @test_umin_sub1_nuw(i32 %x, i32 range(i32 1, 0) %w) {
+; CHECK-LABEL: @test_umin_sub1_nuw(
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[W:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 [[SUB]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ult i32 %x, %w
+ %sub = add i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
+
define <2 x i16> @test_smin_sub1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
; CHECK-LABEL: @test_smin_sub1_nsw_vec(
; CHECK-NEXT: [[SUB:%.*]] = add nsw <2 x i16> [[W:%.*]], splat (i16 -1)
@@ -1657,8 +1669,8 @@ define <2 x i16> @test_smax_add1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
ret <2 x i16> %r
}
-define <2 x i16> @test_umax_add1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
-; CHECK-LABEL: @test_umax_add1_nsw_vec(
+define <2 x i16> @test_umax_add1_nuw_vec(<2 x i16> %x, <2 x i16> %w) {
+; CHECK-LABEL: @test_umax_add1_nuw_vec(
; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i16> [[W:%.*]], splat (i16 1)
; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[ADD]])
; CHECK-NEXT: ret <2 x i16> [[R]]
@@ -1669,6 +1681,19 @@ define <2 x i16> @test_umax_add1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
ret <2 x i16> %r
}
+define <2 x i16> @test_umin_sub1_nuw_vec(<2 x i16> %x, <2 x i16> range(i16 1, 0) %w) {
+; CHECK-LABEL: @test_umin_sub1_nuw_vec(
+; CHECK-NEXT: [[SUB:%.*]] = add <2 x i16> [[W:%.*]], splat (i16 -1)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[SUB]])
+; CHECK-NEXT: ret <2 x i16> [[R]]
+;
+ %cmp = icmp ult <2 x i16> %x, %w
+ %sub = add <2 x i16> %w, splat (i16 -1)
+ %r = select <2 x i1> %cmp, <2 x i16> %x, <2 x i16> %sub
+ ret <2 x i16> %r
+}
+
+
define i32 @test_smin_sub1_nsw_drop_flags(i32 %x, i32 %w) {
; CHECK-LABEL: @test_smin_sub1_nsw_drop_flags(
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[W:%.*]]
@@ -1693,8 +1718,8 @@ define i32 @test_smax_add1_nsw_drop_flags(i32 %x, i32 %w) {
ret i32 %r
}
-define i32 @test_umax_add1_nsw_drop_flags(i32 %x, i32 %w) {
-; CHECK-LABEL: @test_umax_add1_nsw_drop_flags(
+define i32 @test_umax_add1_nuw_drop_flags(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_umax_add1_nuw_drop_flags(
; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[W:%.*]], 1
; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[ADD]])
; CHECK-NEXT: ret i32 [[R]]
@@ -1704,3 +1729,15 @@ define i32 @test_umax_add1_nsw_drop_flags(i32 %x, i32 %w) {
%r = select i1 %cmp, i32 %x, i32 %add
ret i32 %r
}
+
+define i32 @test_umin_sub1_nuw_drop_flags(i32 %x, i32 range(i32 1, 0) %w) {
+; CHECK-LABEL: @test_umin_sub1_nuw_drop_flags(
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[W:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 [[SUB]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ult i32 %x, %w
+ %sub = add nsw i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
>From abb02ec459caf18ba4ea9224afada2f912721b4a Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Tue, 3 Jun 2025 17:23:57 +0000
Subject: [PATCH 6/6] address more comments
---
.../InstCombine/InstCombineSelect.cpp | 7 ++-
.../Transforms/InstCombine/minmax-fold.ll | 48 +++++++++++++++++++
2 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index b7c72be6385ae..df5a4a0f36ffb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -573,12 +573,17 @@ static Value *foldSelectICmpMinMax(const ICmpInst *Cmp, Value *TVal,
const SimplifyQuery &SQ) {
const Value *CmpLHS = Cmp->getOperand(0);
const Value *CmpRHS = Cmp->getOperand(1);
- const ICmpInst::Predicate Pred = Cmp->getPredicate();
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
// (X > Y) ? X : (Y - 1) ==> MIN(X, Y - 1)
// (X < Y) ? X : (Y + 1) ==> MAX(X, Y + 1)
// This transformation is valid when overflow corresponding to the sign of
// the comparison is poison and we must drop the non-matching overflow flag.
+ if (CmpRHS == TVal) {
+ std::swap(CmpLHS, CmpRHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+
if (CmpLHS == TVal) {
if (Pred == CmpInst::ICMP_SGT &&
match(FVal, m_NSWAddLike(m_Specific(CmpRHS), m_One()))) {
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index bf8eeb0618ea8..e2e01466267a4 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -1645,6 +1645,54 @@ define i32 @test_umin_sub1_nuw(i32 %x, i32 range(i32 1, 0) %w) {
ret i32 %r
}
+define i32 @test_smin_sub1_nsw_swapped(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smin_sub1_nsw_swapped(
+; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[W:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[SUB]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp sgt i32 %w, %x
+ %sub = add nsw i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
+
+define i32 @test_smax_add1_nsw_swapped(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smax_add1_nsw_swapped(
+; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[X2]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp slt i32 %w, %x
+ %add = add nsw i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
+define i32 @test_umax_add1_nuw_swapped(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_umax_add1_nuw_swapped(
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[ADD]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ult i32 %w, %x
+ %add = add nuw i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
+define i32 @test_umin_sub1_nuw_swapped(i32 %x, i32 range(i32 1, 0) %w) {
+; CHECK-LABEL: @test_umin_sub1_nuw_swapped(
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[W:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 [[SUB]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ugt i32 %w, %x
+ %sub = add i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
+
define <2 x i16> @test_smin_sub1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
; CHECK-LABEL: @test_smin_sub1_nsw_vec(
; CHECK-NEXT: [[SUB:%.*]] = add nsw <2 x i16> [[W:%.*]], splat (i16 -1)
More information about the llvm-commits
mailing list