[llvm] r289855 - [InstCombine] add folds for icmp (smin X, Y), X

Thu Dec 15 11:13:37 PST 2016

Author: spatel
Date: Thu Dec 15 13:13:37 2016
New Revision: 289855

URL: http://llvm.org/viewvc/llvm-project?rev=289855&view=rev
Log:
[InstCombine] add folds for icmp (smin X, Y), X

Min/max canonicalization (r287585) exposes the fact that we're missing combines for min/max patterns. 
This patch won't solve the example that was attached to that thread, so something else still needs fixing.

The line between InstCombine and InstSimplify gets blurry here because sometimes the icmp instruction that
we want to fold to already exists, but sometimes it's the swapped form of what we want.

Corresponding changes for smax/umin/umax to follow.

Differential Revision: https://reviews.llvm.org/D27531

Modified:
    llvm/trunk/include/llvm/IR/PatternMatch.h
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/trunk/test/Transforms/InstCombine/smin-icmp.ll

Modified: llvm/trunk/include/llvm/IR/PatternMatch.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/PatternMatch.h?rev=289855&r1=289854&r2=289855&view=diff
==============================================================================

--- llvm/trunk/include/llvm/IR/PatternMatch.h (original)
+++ llvm/trunk/include/llvm/IR/PatternMatch.h Thu Dec 15 13:13:37 2016
@@ -1349,6 +1349,16 @@ m_c_Xor(const LHS &L, const RHS &R) {
   return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
 }
 
+// TODO: Add the related SMax, UMax, UMin commuted matchers.
+
+/// Matches an SMin with LHS and RHS in either order.
+template<typename LHS, typename RHS>
+inline match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>,
+                        MaxMin_match<ICmpInst, RHS, LHS, smin_pred_ty>>
+m_c_SMin(const LHS &L, const RHS &R) {
+  return m_CombineOr(m_SMin(L, R), m_SMin(R, L));
+}
+
 } // end namespace PatternMatch
 } // end namespace llvm
 

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp?rev=289855&r1=289854&r2=289855&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCompares.cpp Thu Dec 15 13:13:37 2016
@@ -3030,6 +3030,40 @@ Instruction *InstCombiner::foldICmpBinOp
   return nullptr;
 }
 
+/// Fold icmp Pred smin(X, Y), X.
+static Instruction *foldICmpWithSMin(ICmpInst &Cmp) {
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
+  Value *Op0 = Cmp.getOperand(0);
+  Value *X = Cmp.getOperand(1);
+
+  // TODO: This should be expanded to handle smax/umax/umin.
+
+  // Canonicalize minimum operand to LHS of the icmp.
+  if (match(X, m_c_SMin(m_Specific(Op0), m_Value()))) {
+    std::swap(Op0, X);
+    Pred = Cmp.getSwappedPredicate();
+  }
+
+  Value *Y;
+  if (!match(Op0, m_c_SMin(m_Specific(X), m_Value(Y))))
+    return nullptr;
+
+  // smin(X, Y) == X --> X <= Y
+  // smin(X, Y) >= X --> X <= Y
+  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SGE)
+    return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
+
+  // smin(X, Y) != X --> X > Y
+  // smin(X, Y) <  X --> X > Y
+  if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SLT)
+    return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
+
+  // These cases should be handled in InstSimplify:
+  // smin(X, Y) <= X --> true
+  // smin(X, Y) > X --> false
+  return nullptr;
+}
+
 Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
   if (!I.isEquality())
     return nullptr;
@@ -4277,6 +4311,9 @@ Instruction *InstCombiner::visitICmpInst
   if (Instruction *Res = foldICmpBinOp(I))
     return Res;
 
+  if (Instruction *Res = foldICmpWithSMin(I))
+    return Res;
+
   {
     Value *A, *B;
     // Transform (A & ~B) == 0 --> (A & B) != 0

Modified: llvm/trunk/test/Transforms/InstCombine/smin-icmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/smin-icmp.ll?rev=289855&r1=289854&r2=289855&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/smin-icmp.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/smin-icmp.ll Thu Dec 15 13:13:37 2016
@@ -11,9 +11,7 @@
 
 define i1 @eq_smin1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @eq_smin1(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 %x, %y
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32 %x, i32 %y
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[SEL]], %x
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 %x, %y
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
   %cmp1 = icmp slt i32 %x, %y
@@ -26,9 +24,7 @@ define i1 @eq_smin1(i32 %x, i32 %y) {
 
 define i1 @eq_smin2(i32 %x, i32 %y) {
 ; CHECK-LABEL: @eq_smin2(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 %y, %x
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32 %y, i32 %x
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[SEL]], %x
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 %x, %y
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
   %cmp1 = icmp slt i32 %y, %x
@@ -42,9 +38,7 @@ define i1 @eq_smin2(i32 %x, i32 %y) {
 define i1 @eq_smin3(i32 %a, i32 %y) {
 ; CHECK-LABEL: @eq_smin3(
 ; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[X]], %y
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32 [[X]], i32 %y
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[X]], [[SEL]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[X]], %y
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
   %x = add i32 %a, 3 ; thwart complexity-based canonicalization
@@ -59,9 +53,7 @@ define i1 @eq_smin3(i32 %a, i32 %y) {
 define i1 @eq_smin4(i32 %a, i32 %y) {
 ; CHECK-LABEL: @eq_smin4(
 ; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[X]], %y
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32 %y, i32 [[X]]
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32 [[X]], [[SEL]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[X]], %y
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
   %x = add i32 %a, 3 ; thwart complexity-based canonicalization
@@ -75,9 +67,7 @@ define i1 @eq_smin4(i32 %a, i32 %y) {
 
 define i1 @sge_smin1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sge_smin1(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 %x, %y
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32 %x, i32 %y
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 [[SEL]], %x
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 %x, %y
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
   %cmp1 = icmp slt i32 %x, %y
@@ -90,9 +80,7 @@ define i1 @sge_smin1(i32 %x, i32 %y) {
 
 define i1 @sge_smin2(i32 %x, i32 %y) {
 ; CHECK-LABEL: @sge_smin2(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 %y, %x
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32 %y, i32 %x
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp sge i32 [[SEL]], %x
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 %x, %y
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
   %cmp1 = icmp slt i32 %y, %x
@@ -106,9 +94,7 @@ define i1 @sge_smin2(i32 %x, i32 %y) {
 define i1 @sge_smin3(i32 %a, i32 %y) {
 ; CHECK-LABEL: @sge_smin3(
 ; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[X]], %y
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32 [[X]], i32 %y
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[X]], [[SEL]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[X]], %y
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
   %x = add i32 %a, 3 ; thwart complexity-based canonicalization
@@ -123,9 +109,7 @@ define i1 @sge_smin3(i32 %a, i32 %y) {
 define i1 @sge_smin4(i32 %a, i32 %y) {
 ; CHECK-LABEL: @sge_smin4(
 ; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[X]], %y
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32 %y, i32 [[X]]
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[X]], [[SEL]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sle i32 [[X]], %y
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
   %x = add i32 %a, 3 ; thwart complexity-based canonicalization
@@ -139,9 +123,7 @@ define i1 @sge_smin4(i32 %a, i32 %y) {
 
 define i1 @ne_smin1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @ne_smin1(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 %x, %y
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32 %x, i32 %y
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[SEL]], %x
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 %x, %y
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
   %cmp1 = icmp slt i32 %x, %y
@@ -168,9 +150,7 @@ define i1 @ne_smin2(i32 %x, i32 %y) {
 define i1 @ne_smin3(i32 %a, i32 %y) {
 ; CHECK-LABEL: @ne_smin3(
 ; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[X]], %y
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32 [[X]], i32 %y
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[X]], [[SEL]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[X]], %y
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
   %x = add i32 %a, 3 ; thwart complexity-based canonicalization
@@ -199,9 +179,7 @@ define i1 @ne_smin4(i32 %a, i32 %y) {
 
 define i1 @slt_smin1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @slt_smin1(
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 %x, %y
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32 %x, i32 %y
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[SEL]], %x
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 %x, %y
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
   %cmp1 = icmp slt i32 %x, %y
@@ -228,9 +206,7 @@ define i1 @slt_smin2(i32 %x, i32 %y) {
 define i1 @slt_smin3(i32 %a, i32 %y) {
 ; CHECK-LABEL: @slt_smin3(
 ; CHECK-NEXT:    [[X:%.*]] = add i32 %a, 3
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[X]], %y
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP1]], i32 [[X]], i32 %y
-; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[X]], [[SEL]]
+; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[X]], %y
 ; CHECK-NEXT:    ret i1 [[CMP2]]
 ;
   %x = add i32 %a, 3 ; thwart complexity-based canonicalization