[llvm] [InstCombine] Allow min/max in constant BOp min/max folding (PR #142878)

Wed Jun 4 17:31:33 PDT 2025

https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/142878

Extend folding for `X Pred C2 ? X BOp C1 : C2 BOp C1` to `min/max(X, C2) BOp C1` to allow min and max as `BOp`. This ensures a constant clamping pattern is folded into a pair of min/max instructions. Here is a simplified example of a case where this folding is not occurring currently. 
```cpp
int clampToU8(int v) {
    if (v < 0) return 0;
    if (v > 255) return 255;
    return v;
}
```
https://godbolt.org/z/78jhKPWbv

Generic proof: https://alive2.llvm.org/ce/z/cdpLYy

>From db9c88cc3a8fee1daf2a6d347df25c09995993e9 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Wed, 4 Jun 2025 16:42:28 +0000
Subject: [PATCH 1/2] pre-commit tests

---
 .../InstCombine/canonicalize-const-to-bop.ll  | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
index 68049ca230191..c84e58113d843 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
@@ -399,3 +399,59 @@ define i8 @sub_const_on_lhs_negative(i8 %x) {
   %s = select i1 %cmp, i8 %sub, i8 50
   ret i8 %s
 }
+
+define i8 @smin_ugt(i8 %x) {
+; CHECK-LABEL: define i8 @smin_ugt(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[SMIN:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 50)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[X]], 100
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i8 50, i8 [[SMIN]]
+; CHECK-NEXT:    ret i8 [[S]]
+;
+  %smin = call i8 @llvm.smin.i8(i8 %x, i8 50)
+  %cmp = icmp ugt i8 %x, 100
+  %s = select i1 %cmp, i8 50, i8 %smin
+  ret i8 %s
+}
+
+define i8 @smax_ugt(i8 %x) {
+; CHECK-LABEL: define i8 @smax_ugt(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[SMAX:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 50)
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[X]], 100
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i8 100, i8 [[SMAX]]
+; CHECK-NEXT:    ret i8 [[S]]
+;
+  %smax = call i8 @llvm.smax.i8(i8 %x, i8 50)
+  %cmp = icmp ugt i8 %x, 100
+  %s = select i1 %cmp, i8 100, i8 %smax
+  ret i8 %s
+}
+
+define i8 @umin_slt(i8 %x) {
+; CHECK-LABEL: define i8 @umin_slt(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X]], 0
+; CHECK-NEXT:    [[UMIN:%.*]] = tail call i8 @llvm.umin.i8(i8 [[X]], i8 100)
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i8 0, i8 [[UMIN]]
+; CHECK-NEXT:    ret i8 [[S]]
+;
+  %cmp = icmp slt i8 %x, 0
+  %umin = tail call i8 @llvm.umin.i8(i8 %x, i8 100)
+  %s = select i1 %cmp, i8 0, i8 %umin
+  ret i8 %s
+}
+
+define i8 @umax_sgt(i8 %x) {
+; CHECK-LABEL: define i8 @umax_sgt(
+; CHECK-SAME: i8 [[X:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X]], 100
+; CHECK-NEXT:    [[UMAX:%.*]] = tail call i8 @llvm.umax.i8(i8 [[X]], i8 50)
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i8 100, i8 [[UMAX]]
+; CHECK-NEXT:    ret i8 [[S]]
+;
+  %cmp = icmp sgt i8 %x, 100
+  %umax = tail call i8 @llvm.umax.i8(i8 %x, i8 50)
+  %s = select i1 %cmp, i8 100, i8 %umax
+  ret i8 %s
+}

>From 669511a9f60b2fb9aea8376ccdfcf79585368f19 Mon Sep 17 00:00:00 2001
From: Alex Maclean <amaclean at nvidia.com>
Date: Wed, 4 Jun 2025 16:51:06 +0000
Subject: [PATCH 2/2] [InstCombine] Allow min/max in constant BOp min/max
 folding

---
 .../InstCombine/InstCombineSelect.cpp         | 47 +++++++++++++------
 .../InstCombine/canonicalize-const-to-bop.ll  | 19 +++-----
 2 files changed, 40 insertions(+), 26 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index d7d0431a5b8d0..8307a9842fb95 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -1822,7 +1822,6 @@ static Instruction *foldSelectICmpEq(SelectInst &SI, ICmpInst *ICI,
 static Value *foldSelectWithConstOpToBinOp(ICmpInst *Cmp, Value *TrueVal,
                                            Value *FalseVal,
                                            IRBuilderBase &Builder) {
-  BinaryOperator *BOp;
   Constant *C1, *C2, *C3;
   Value *X;
   CmpPredicate Predicate;
@@ -1838,30 +1837,48 @@ static Value *foldSelectWithConstOpToBinOp(ICmpInst *Cmp, Value *TrueVal,
     Predicate = ICmpInst::getInversePredicate(Predicate);
   }
 
-  if (!match(TrueVal, m_BinOp(BOp)) || !match(FalseVal, m_Constant(C3)))
+  if (!match(FalseVal, m_Constant(C3)) || !TrueVal->hasOneUse())
     return nullptr;
 
-  unsigned Opcode = BOp->getOpcode();
+  bool IsIntrinsic;
+  unsigned Opcode;
+  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(TrueVal)) {
+    Opcode = BOp->getOpcode();
+    IsIntrinsic = false;
 
-  // This fold causes some regressions and is primarily intended for
-  // add and sub. So we early exit for div and rem to minimize the
-  // regressions.
-  if (Instruction::isIntDivRem(Opcode))
-    return nullptr;
+    // This fold causes some regressions and is primarily intended for
+    // add and sub. So we early exit for div and rem to minimize the
+    // regressions.
+    if (Instruction::isIntDivRem(Opcode))
+      return nullptr;
 
-  if (!match(BOp, m_OneUse(m_BinOp(m_Specific(X), m_Constant(C2)))))
+    if (!match(BOp, m_BinOp(m_Specific(X), m_Constant(C2))))
+      return nullptr;
+
+  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(TrueVal)) {
+    if (!match(II, m_MaxOrMin(m_Specific(X), m_Constant(C2))))
+      return nullptr;
+    Opcode = II->getIntrinsicID();
+    IsIntrinsic = true;
+  } else {
     return nullptr;
+  }
 
   Value *RHS;
   SelectPatternFlavor SPF;
-  const DataLayout &DL = BOp->getDataLayout();
+  const DataLayout &DL = Cmp->getDataLayout();
   auto Flipped = getFlippedStrictnessPredicateAndConstant(Predicate, C1);
 
-  if (C3 == ConstantFoldBinaryOpOperands(Opcode, C1, C2, DL)) {
+  auto FoldBinaryOpOrIntrinsic = [&](Constant *LHS, Constant *RHS) {
+    return IsIntrinsic ? ConstantFoldBinaryIntrinsic(Opcode, LHS, RHS,
+                                                     LHS->getType(), nullptr)
+                       : ConstantFoldBinaryOpOperands(Opcode, LHS, RHS, DL);
+  };
+
+  if (C3 == FoldBinaryOpOrIntrinsic(C1, C2)) {
     SPF = getSelectPattern(Predicate).Flavor;
     RHS = C1;
-  } else if (Flipped && C3 == ConstantFoldBinaryOpOperands(
-                                  Opcode, Flipped->second, C2, DL)) {
+  } else if (Flipped && C3 == FoldBinaryOpOrIntrinsic(Flipped->second, C2)) {
     SPF = getSelectPattern(Flipped->first).Flavor;
     RHS = Flipped->second;
   } else {
@@ -1870,7 +1887,9 @@ static Value *foldSelectWithConstOpToBinOp(ICmpInst *Cmp, Value *TrueVal,
 
   Intrinsic::ID IntrinsicID = getMinMaxIntrinsic(SPF);
   Value *Intrinsic = Builder.CreateBinaryIntrinsic(IntrinsicID, X, RHS);
-  return Builder.CreateBinOp(BOp->getOpcode(), Intrinsic, C2);
+  return IsIntrinsic ? Builder.CreateBinaryIntrinsic(Opcode, Intrinsic, C2)
+                     : Builder.CreateBinOp(Instruction::BinaryOps(Opcode),
+                                           Intrinsic, C2);
 }
 
 /// Visit a SelectInst that has an ICmpInst as its first operand.
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
index c84e58113d843..c08ec1bb7de0d 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-const-to-bop.ll
@@ -403,9 +403,7 @@ define i8 @sub_const_on_lhs_negative(i8 %x) {
 define i8 @smin_ugt(i8 %x) {
 ; CHECK-LABEL: define i8 @smin_ugt(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[SMIN:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 50)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[X]], 100
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i8 50, i8 [[SMIN]]
+; CHECK-NEXT:    [[S:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 50)
 ; CHECK-NEXT:    ret i8 [[S]]
 ;
   %smin = call i8 @llvm.smin.i8(i8 %x, i8 50)
@@ -417,9 +415,8 @@ define i8 @smin_ugt(i8 %x) {
 define i8 @smax_ugt(i8 %x) {
 ; CHECK-LABEL: define i8 @smax_ugt(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[SMAX:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 50)
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[X]], 100
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i8 100, i8 [[SMAX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 100)
+; CHECK-NEXT:    [[S:%.*]] = call i8 @llvm.smax.i8(i8 [[TMP1]], i8 50)
 ; CHECK-NEXT:    ret i8 [[S]]
 ;
   %smax = call i8 @llvm.smax.i8(i8 %x, i8 50)
@@ -431,9 +428,8 @@ define i8 @smax_ugt(i8 %x) {
 define i8 @umin_slt(i8 %x) {
 ; CHECK-LABEL: define i8 @umin_slt(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[X]], 0
-; CHECK-NEXT:    [[UMIN:%.*]] = tail call i8 @llvm.umin.i8(i8 [[X]], i8 100)
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i8 0, i8 [[UMIN]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 0)
+; CHECK-NEXT:    [[S:%.*]] = call i8 @llvm.umin.i8(i8 [[TMP1]], i8 100)
 ; CHECK-NEXT:    ret i8 [[S]]
 ;
   %cmp = icmp slt i8 %x, 0
@@ -445,9 +441,8 @@ define i8 @umin_slt(i8 %x) {
 define i8 @umax_sgt(i8 %x) {
 ; CHECK-LABEL: define i8 @umax_sgt(
 ; CHECK-SAME: i8 [[X:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X]], 100
-; CHECK-NEXT:    [[UMAX:%.*]] = tail call i8 @llvm.umax.i8(i8 [[X]], i8 50)
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[CMP]], i8 100, i8 [[UMAX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 100)
+; CHECK-NEXT:    [[S:%.*]] = call i8 @llvm.umax.i8(i8 [[TMP1]], i8 50)
 ; CHECK-NEXT:    ret i8 [[S]]
 ;
   %cmp = icmp sgt i8 %x, 100