[llvm] 0902904 - [InstCombine] Fold max/min when incrementing/decrementing by 1 (#142466)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 07:55:59 PDT 2025
Author: Alex MacLean
Date: 2025-06-10T07:55:56-07:00
New Revision: 09029045a88b48591ce645bae640fc3bc8b58b63
URL: https://github.com/llvm/llvm-project/commit/09029045a88b48591ce645bae640fc3bc8b58b63
DIFF: https://github.com/llvm/llvm-project/commit/09029045a88b48591ce645bae640fc3bc8b58b63.diff
LOG: [InstCombine] Fold max/min when incrementing/decrementing by 1 (#142466)
Add the following folds for integer min max folding in InstCombine:
- (X > Y) ? X : (Y - 1) ==> MIN(X, Y - 1)
- (X < Y) ? X : (Y + 1) ==> MAX(X, Y + 1)
These are safe when overflow corresponding to the sign of the comparison
is poison. (proof https://alive2.llvm.org/ce/z/oj5iiI).
The most common of these patterns is likely the minimum case which
occurs in some internal library code when clamping an integer index to a
range (The maximum cases are included for completeness). Here is a
simplified example:
int clampToWidth(int idx, int width) {
if (idx >= width)
return width - 1;
return idx;
}
https://cuda.godbolt.org/z/nhPzWrc3W
Added:
Modified:
llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
llvm/test/Transforms/InstCombine/minmax-fold.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8f46ae304353d..979a803a79ed8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -565,6 +565,62 @@ Instruction *InstCombinerImpl::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
return nullptr;
}
+/// Try to fold a select to a min/max intrinsic. Many cases are already handled
+/// by matchDecomposedSelectPattern but here we handle the cases where more
+/// extensive modification of the IR is required.
+static Value *foldSelectICmpMinMax(const ICmpInst *Cmp, Value *TVal,
+ Value *FVal,
+ InstCombiner::BuilderTy &Builder,
+ const SimplifyQuery &SQ) {
+ const Value *CmpLHS = Cmp->getOperand(0);
+ const Value *CmpRHS = Cmp->getOperand(1);
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+
+ // (X > Y) ? X : (Y - 1) ==> MIN(X, Y - 1)
+ // (X < Y) ? X : (Y + 1) ==> MAX(X, Y + 1)
+ // This transformation is valid when overflow corresponding to the sign of
+ // the comparison is poison and we must drop the non-matching overflow flag.
+ if (CmpRHS == TVal) {
+ std::swap(CmpLHS, CmpRHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+
+ // TODO: consider handling 'or disjoint' as well, though these would need to
+ // be converted to 'add' instructions.
+ if (!(CmpLHS == TVal && isa<Instruction>(FVal)))
+ return nullptr;
+
+ if (Pred == CmpInst::ICMP_SGT &&
+ match(FVal, m_NSWAdd(m_Specific(CmpRHS), m_One()))) {
+ cast<Instruction>(FVal)->setHasNoUnsignedWrap(false);
+ return Builder.CreateBinaryIntrinsic(Intrinsic::smax, TVal, FVal);
+ }
+
+ if (Pred == CmpInst::ICMP_SLT &&
+ match(FVal, m_NSWAdd(m_Specific(CmpRHS), m_AllOnes()))) {
+ cast<Instruction>(FVal)->setHasNoUnsignedWrap(false);
+ return Builder.CreateBinaryIntrinsic(Intrinsic::smin, TVal, FVal);
+ }
+
+ if (Pred == CmpInst::ICMP_UGT &&
+ match(FVal, m_NUWAdd(m_Specific(CmpRHS), m_One()))) {
+ cast<Instruction>(FVal)->setHasNoSignedWrap(false);
+ return Builder.CreateBinaryIntrinsic(Intrinsic::umax, TVal, FVal);
+ }
+
+ // Note: We must use isKnownNonZero here because "sub nuw %x, 1" will be
+ // canonicalized to "add %x, -1" discarding the nuw flag.
+ if (Pred == CmpInst::ICMP_ULT &&
+ match(FVal, m_Add(m_Specific(CmpRHS), m_AllOnes())) &&
+ isKnownNonZero(CmpRHS, SQ)) {
+ cast<Instruction>(FVal)->setHasNoSignedWrap(false);
+ cast<Instruction>(FVal)->setHasNoUnsignedWrap(false);
+ return Builder.CreateBinaryIntrinsic(Intrinsic::umin, TVal, FVal);
+ }
+
+ return nullptr;
+}
+
/// We want to turn:
/// (select (icmp eq (and X, Y), 0), (and (lshr X, Z), 1), 1)
/// into:
@@ -1940,6 +1996,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI,
return &SI;
}
+ if (Value *V = foldSelectICmpMinMax(ICI, TrueVal, FalseVal, Builder, SQ))
+ return replaceInstUsesWith(SI, V);
+
if (Instruction *V =
foldSelectICmpAndAnd(SI.getType(), ICI, TrueVal, FalseVal, Builder))
return V;
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index 3bb1fd60f3afe..a982225370620 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -1598,3 +1598,247 @@ define <2 x i32> @test_umax_smax_vec_neg(<2 x i32> %x) {
%umax = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %smax, <2 x i32> <i32 1, i32 10>)
ret <2 x i32> %umax
}
+
+define i32 @test_smin_sub1_nsw(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smin_sub1_nsw(
+; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[W:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[SUB]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp slt i32 %x, %w
+ %sub = add nsw i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
+
+define i32 @test_smax_add1_nsw(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smax_add1_nsw(
+; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[X2]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp sgt i32 %x, %w
+ %add = add nsw i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
+define i32 @test_umax_add1_nuw(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_umax_add1_nuw(
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[ADD]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ugt i32 %x, %w
+ %add = add nuw i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
+define i32 @test_umin_sub1_nuw(i32 %x, i32 range(i32 1, 0) %w) {
+; CHECK-LABEL: @test_umin_sub1_nuw(
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[W:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 [[SUB]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ult i32 %x, %w
+ %sub = add i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
+
+define i32 @test_smin_sub1_nsw_swapped(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smin_sub1_nsw_swapped(
+; CHECK-NEXT: [[SUB:%.*]] = add nsw i32 [[W:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smin.i32(i32 [[X:%.*]], i32 [[SUB]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp sgt i32 %w, %x
+ %sub = add nsw i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
+
+define i32 @test_smax_add1_nsw_swapped(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smax_add1_nsw_swapped(
+; CHECK-NEXT: [[X2:%.*]] = add nsw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[X2]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp slt i32 %w, %x
+ %add = add nsw i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
+define i32 @test_umax_add1_nuw_swapped(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_umax_add1_nuw_swapped(
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[ADD]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ult i32 %w, %x
+ %add = add nuw i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
+define i32 @test_umin_sub1_nuw_swapped(i32 %x, i32 range(i32 1, 0) %w) {
+; CHECK-LABEL: @test_umin_sub1_nuw_swapped(
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[W:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 [[SUB]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ugt i32 %w, %x
+ %sub = add i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
+
+define <2 x i16> @test_smin_sub1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
+; CHECK-LABEL: @test_smin_sub1_nsw_vec(
+; CHECK-NEXT: [[SUB:%.*]] = add nsw <2 x i16> [[W:%.*]], splat (i16 -1)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[SUB]])
+; CHECK-NEXT: ret <2 x i16> [[R]]
+;
+ %cmp = icmp slt <2 x i16> %x, %w
+ %sub = add nsw <2 x i16> %w, splat (i16 -1)
+ %r = select <2 x i1> %cmp, <2 x i16> %x, <2 x i16> %sub
+ ret <2 x i16> %r
+}
+
+define <2 x i16> @test_smax_add1_nsw_vec(<2 x i16> %x, <2 x i16> %w) {
+; CHECK-LABEL: @test_smax_add1_nsw_vec(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw <2 x i16> [[W:%.*]], splat (i16 1)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[ADD]])
+; CHECK-NEXT: ret <2 x i16> [[R]]
+;
+ %cmp = icmp sgt <2 x i16> %x, %w
+ %add = add nsw <2 x i16> %w, splat (i16 1)
+ %r = select <2 x i1> %cmp, <2 x i16> %x, <2 x i16> %add
+ ret <2 x i16> %r
+}
+
+define <2 x i16> @test_umax_add1_nuw_vec(<2 x i16> %x, <2 x i16> %w) {
+; CHECK-LABEL: @test_umax_add1_nuw_vec(
+; CHECK-NEXT: [[ADD:%.*]] = add nuw <2 x i16> [[W:%.*]], splat (i16 1)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[ADD]])
+; CHECK-NEXT: ret <2 x i16> [[R]]
+;
+ %cmp = icmp ugt <2 x i16> %x, %w
+ %add = add nuw <2 x i16> %w, splat (i16 1)
+ %r = select <2 x i1> %cmp, <2 x i16> %x, <2 x i16> %add
+ ret <2 x i16> %r
+}
+
+define <2 x i16> @test_umin_sub1_nuw_vec(<2 x i16> %x, <2 x i16> range(i16 1, 0) %w) {
+; CHECK-LABEL: @test_umin_sub1_nuw_vec(
+; CHECK-NEXT: [[SUB:%.*]] = add <2 x i16> [[W:%.*]], splat (i16 -1)
+; CHECK-NEXT: [[R:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[X:%.*]], <2 x i16> [[SUB]])
+; CHECK-NEXT: ret <2 x i16> [[R]]
+;
+ %cmp = icmp ult <2 x i16> %x, %w
+ %sub = add <2 x i16> %w, splat (i16 -1)
+ %r = select <2 x i1> %cmp, <2 x i16> %x, <2 x i16> %sub
+ ret <2 x i16> %r
+}
+
+
+define i32 @test_smin_sub1_nsw_drop_flags(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smin_sub1_nsw_drop_flags(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[W:%.*]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 -1
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp slt i32 %x, %w
+ %sub = add nsw nuw i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
+
+define i32 @test_smax_add1_nsw_drop_flags(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smax_add1_nsw_drop_flags(
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.smax.i32(i32 [[X:%.*]], i32 [[ADD]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp sgt i32 %x, %w
+ %add = add nsw nuw i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
+define i32 @test_umax_add1_nuw_drop_flags(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_umax_add1_nuw_drop_flags(
+; CHECK-NEXT: [[ADD:%.*]] = add nuw i32 [[W:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umax.i32(i32 [[X:%.*]], i32 [[ADD]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ugt i32 %x, %w
+ %add = add nuw nsw i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
+define i32 @test_umin_sub1_nuw_drop_flags(i32 %x, i32 range(i32 1, 0) %w) {
+; CHECK-LABEL: @test_umin_sub1_nuw_drop_flags(
+; CHECK-NEXT: [[SUB:%.*]] = add i32 [[W:%.*]], -1
+; CHECK-NEXT: [[R:%.*]] = call i32 @llvm.umin.i32(i32 [[X:%.*]], i32 [[SUB]])
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ult i32 %x, %w
+ %sub = add nsw i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
+
+;; Confirm we don't crash on these cases.
+define i32 @test_smin_or_neg1_nsw(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smin_or_neg1_nsw(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[X:%.*]], [[W:%.*]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 -1
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp slt i32 %x, %w
+ %sub = or disjoint i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
+
+define i32 @test_smax_or_1_nsw(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_smax_or_1_nsw(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[W:%.*]]
+; CHECK-NEXT: [[ADD:%.*]] = or disjoint i32 [[W]], 1
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[ADD]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp sgt i32 %x, %w
+ %add = or disjoint i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
+define i32 @test_umax_or_1_nuw(i32 %x, i32 %w) {
+; CHECK-LABEL: @test_umax_or_1_nuw(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[X:%.*]], [[W:%.*]]
+; CHECK-NEXT: [[ADD:%.*]] = or disjoint i32 [[W]], 1
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 [[ADD]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ugt i32 %x, %w
+ %add = or disjoint i32 %w, 1
+ %r = select i1 %cmp, i32 %x, i32 %add
+ ret i32 %r
+}
+
+define i32 @test_umin_or_neg1_nuw(i32 %x, i32 range(i32 1, 0) %w) {
+; CHECK-LABEL: @test_umin_or_neg1_nuw(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[X:%.*]], [[W:%.*]]
+; CHECK-NEXT: [[R:%.*]] = select i1 [[CMP]], i32 [[X]], i32 -1
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %cmp = icmp ult i32 %x, %w
+ %sub = or disjoint i32 %w, -1
+ %r = select i1 %cmp, i32 %x, i32 %sub
+ ret i32 %r
+}
More information about the llvm-commits
mailing list