[llvm] r321998 - [InstCombine] fold min/max tree with common operand (PR35717)

Mon Jan 8 07:05:34 PST 2018

Author: spatel
Date: Mon Jan  8 07:05:34 2018
New Revision: 321998

URL: http://llvm.org/viewvc/llvm-project?rev=321998&view=rev
Log:
[InstCombine] fold min/max tree with common operand (PR35717)

There is precedence for factorization transforms in instcombine for FP ops with fast-math. 
We also have similar logic in foldSPFofSPF().

It would take more work to add this to reassociate because that's specialized for binops, 
and min/max are not binops (or even single instructions). Also, I don't have evidence that 
larger min/max trees than this exist in real code, but if we find that's true, we might
want to reorganize where/how we do this optimization.

In the motivating example from https://bugs.llvm.org/show_bug.cgi?id=35717 , we have:

int test(int xc, int xm, int xy) {
  int xk;
  if (xc < xm)
    xk = xc < xy ? xc : xy;
  else
    xk = xm < xy ? xm : xy;
  return xk;
}

This patch solves that problem because we recognize more min/max patterns after rL321672

https://rise4fun.com/Alive/Qjne
https://rise4fun.com/Alive/3yg

Differential Revision: https://reviews.llvm.org/D41603

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
    llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll
    llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp?rev=321998&r1=321997&r2=321998&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp Mon Jan  8 07:05:34 2018
@@ -1289,6 +1289,63 @@ static Instruction *foldSelectCmpXchg(Se
   return nullptr;
 }
 
+/// Reduce a sequence of min/max with a common operand.
+static Instruction *factorizeMinMaxTree(SelectPatternFlavor SPF, Value *LHS,
+                                        Value *RHS,
+                                        InstCombiner::BuilderTy &Builder) {
+  assert(SelectPatternResult::isMinOrMax(SPF) && "Expected a min/max");
+  // TODO: Allow FP min/max with nnan/nsz.
+  if (!LHS->getType()->isIntOrIntVectorTy())
+    return nullptr;
+
+  // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
+  Value *A, *B, *C, *D;
+  SelectPatternResult L = matchSelectPattern(LHS, A, B);
+  SelectPatternResult R = matchSelectPattern(RHS, C, D);
+  if (SPF != L.Flavor || L.Flavor != R.Flavor)
+    return nullptr;
+
+  // Look for a common operand. The use checks are different than usual because
+  // a min/max pattern typically has 2 uses of each op: 1 by the cmp and 1 by
+  // the select.
+  Value *MinMaxOp = nullptr;
+  Value *ThirdOp = nullptr;
+  if (LHS->getNumUses() <= 2 && RHS->getNumUses() > 2) {
+    // If the LHS is only used in this chain and the RHS is used outside of it,
+    // reuse the RHS min/max because that will eliminate the LHS.
+    if (D == A || C == A) {
+      // min(min(a, b), min(c, a)) --> min(min(c, a), b)
+      // min(min(a, b), min(a, d)) --> min(min(a, d), b)
+      MinMaxOp = RHS;
+      ThirdOp = B;
+    } else if (D == B || C == B) {
+      // min(min(a, b), min(c, b)) --> min(min(c, b), a)
+      // min(min(a, b), min(b, d)) --> min(min(b, d), a)
+      MinMaxOp = RHS;
+      ThirdOp = A;
+    }
+  } else if (RHS->getNumUses() <= 2) {
+    // Reuse the LHS. This will eliminate the RHS.
+    if (D == A || D == B) {
+      // min(min(a, b), min(c, a)) --> min(min(a, b), c)
+      // min(min(a, b), min(c, b)) --> min(min(a, b), c)
+      MinMaxOp = LHS;
+      ThirdOp = C;
+    } else if (C == A || C == B) {
+      // min(min(a, b), min(b, d)) --> min(min(a, b), d)
+      // min(min(a, b), min(c, b)) --> min(min(a, b), d)
+      MinMaxOp = LHS;
+      ThirdOp = D;
+    }
+  }
+  if (!MinMaxOp || !ThirdOp)
+    return nullptr;
+
+  CmpInst::Predicate P = getCmpPredicateForMinMax(SPF);
+  Value *CmpABC = Builder.CreateICmp(P, MinMaxOp, ThirdOp);
+  return SelectInst::Create(CmpABC, MinMaxOp, ThirdOp);
+}
+
 Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
   Value *TrueVal = SI.getTrueValue();
@@ -1563,6 +1620,9 @@ Instruction *InstCombiner::visitSelectIn
         Value *NewSel = Builder.CreateSelect(InvertedCmp, A, B);
         return BinaryOperator::CreateNot(NewSel);
       }
+
+      if (Instruction *I = factorizeMinMaxTree(SPF, LHS, RHS, Builder))
+        return I;
     }
 
     if (SPF) {

Modified: llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll?rev=321998&r1=321997&r2=321998&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/max-of-nots.ll Mon Jan  8 07:05:34 2018
@@ -84,12 +84,10 @@ define i8 @umin_not_2_extra_use(i8 %x, i
 
 define i8 @umin3_not(i8 %x, i8 %y, i8 %z) {
 ; CHECK-LABEL: @umin3_not(
-; CHECK-NEXT:    [[CMPYX:%.*]] = icmp ult i8 %y, %x
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 %x, %z
 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 %x, i8 %z
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i8 %y, %z
-; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i8 %y, i8 %z
-; CHECK-NEXT:    [[R_V:%.*]] = select i1 [[CMPYX]], i8 [[TMP2]], i8 [[TMP4]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i8 [[TMP2]], %y
+; CHECK-NEXT:    [[R_V:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 %y
 ; CHECK-NEXT:    [[R:%.*]] = xor i8 [[R:%.*]].v, -1
 ; CHECK-NEXT:    ret i8 [[R]]
 ;

Modified: llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll?rev=321998&r1=321997&r2=321998&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/minmax-fold.ll Mon Jan  8 07:05:34 2018
@@ -754,10 +754,8 @@ define i32 @common_factor_smin(i32 %a, i
 ; CHECK-LABEL: @common_factor_smin(
 ; CHECK-NEXT:    [[CMP_AB:%.*]] = icmp slt i32 %a, %b
 ; CHECK-NEXT:    [[MIN_AB:%.*]] = select i1 [[CMP_AB]], i32 %a, i32 %b
-; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp slt i32 %b, %c
-; CHECK-NEXT:    [[MIN_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
-; CHECK-NEXT:    [[CMP_AB_BC:%.*]] = icmp slt i32 [[MIN_AB]], [[MIN_BC]]
-; CHECK-NEXT:    [[MIN_ABC:%.*]] = select i1 [[CMP_AB_BC]], i32 [[MIN_AB]], i32 [[MIN_BC]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[MIN_AB]], %c
+; CHECK-NEXT:    [[MIN_ABC:%.*]] = select i1 [[TMP1]], i32 [[MIN_AB]], i32 %c
 ; CHECK-NEXT:    ret i32 [[MIN_ABC]]
 ;
   %cmp_ab = icmp slt i32 %a, %b
@@ -775,10 +773,8 @@ define <2 x i32> @common_factor_smax(<2
 ; CHECK-LABEL: @common_factor_smax(
 ; CHECK-NEXT:    [[CMP_AB:%.*]] = icmp sgt <2 x i32> %a, %b
 ; CHECK-NEXT:    [[MAX_AB:%.*]] = select <2 x i1> [[CMP_AB]], <2 x i32> %a, <2 x i32> %b
-; CHECK-NEXT:    [[CMP_CB:%.*]] = icmp sgt <2 x i32> %c, %b
-; CHECK-NEXT:    [[MAX_CB:%.*]] = select <2 x i1> [[CMP_CB]], <2 x i32> %c, <2 x i32> %b
-; CHECK-NEXT:    [[CMP_AB_CB:%.*]] = icmp sgt <2 x i32> [[MAX_AB]], [[MAX_CB]]
-; CHECK-NEXT:    [[MAX_ABC:%.*]] = select <2 x i1> [[CMP_AB_CB]], <2 x i32> [[MAX_AB]], <2 x i32> [[MAX_CB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <2 x i32> [[MAX_AB]], %c
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[MAX_AB]], <2 x i32> %c
 ; CHECK-NEXT:    ret <2 x i32> [[MAX_ABC]]
 ;
   %cmp_ab = icmp sgt <2 x i32> %a, %b
@@ -796,10 +792,8 @@ define <2 x i32> @common_factor_umin(<2
 ; CHECK-LABEL: @common_factor_umin(
 ; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ult <2 x i32> %b, %c
 ; CHECK-NEXT:    [[MIN_BC:%.*]] = select <2 x i1> [[CMP_BC]], <2 x i32> %b, <2 x i32> %c
-; CHECK-NEXT:    [[CMP_AB:%.*]] = icmp ult <2 x i32> %a, %b
-; CHECK-NEXT:    [[MIN_AB:%.*]] = select <2 x i1> [[CMP_AB]], <2 x i32> %a, <2 x i32> %b
-; CHECK-NEXT:    [[CMP_BC_AB:%.*]] = icmp ult <2 x i32> [[MIN_BC]], [[MIN_AB]]
-; CHECK-NEXT:    [[MIN_ABC:%.*]] = select <2 x i1> [[CMP_BC_AB]], <2 x i32> [[MIN_BC]], <2 x i32> [[MIN_AB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[MIN_BC]], %a
+; CHECK-NEXT:    [[MIN_ABC:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[MIN_BC]], <2 x i32> %a
 ; CHECK-NEXT:    ret <2 x i32> [[MIN_ABC]]
 ;
   %cmp_bc = icmp ult <2 x i32> %b, %c
@@ -817,10 +811,8 @@ define i32 @common_factor_umax(i32 %a, i
 ; CHECK-LABEL: @common_factor_umax(
 ; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ugt i32 %b, %c
 ; CHECK-NEXT:    [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
-; CHECK-NEXT:    [[CMP_BA:%.*]] = icmp ugt i32 %b, %a
-; CHECK-NEXT:    [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a
-; CHECK-NEXT:    [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]]
-; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[MAX_BC]], %a
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[TMP1]], i32 [[MAX_BC]], i32 %a
 ; CHECK-NEXT:    ret i32 [[MAX_ABC]]
 ;
   %cmp_bc = icmp ugt i32 %b, %c
@@ -838,10 +830,8 @@ define i32 @common_factor_umax_extra_use
 ; CHECK-LABEL: @common_factor_umax_extra_use_lhs(
 ; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ugt i32 %b, %c
 ; CHECK-NEXT:    [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
-; CHECK-NEXT:    [[CMP_BA:%.*]] = icmp ugt i32 %b, %a
-; CHECK-NEXT:    [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a
-; CHECK-NEXT:    [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]]
-; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[MAX_BC]], %a
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[TMP1]], i32 [[MAX_BC]], i32 %a
 ; CHECK-NEXT:    call void @extra_use(i32 [[MAX_BC]])
 ; CHECK-NEXT:    ret i32 [[MAX_ABC]]
 ;
@@ -857,12 +847,10 @@ define i32 @common_factor_umax_extra_use
 
 define i32 @common_factor_umax_extra_use_rhs(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: @common_factor_umax_extra_use_rhs(
-; CHECK-NEXT:    [[CMP_BC:%.*]] = icmp ugt i32 %b, %c
-; CHECK-NEXT:    [[MAX_BC:%.*]] = select i1 [[CMP_BC]], i32 %b, i32 %c
 ; CHECK-NEXT:    [[CMP_BA:%.*]] = icmp ugt i32 %b, %a
 ; CHECK-NEXT:    [[MAX_BA:%.*]] = select i1 [[CMP_BA]], i32 %b, i32 %a
-; CHECK-NEXT:    [[CMP_BC_BA:%.*]] = icmp ugt i32 [[MAX_BC]], [[MAX_BA]]
-; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[CMP_BC_BA]], i32 [[MAX_BC]], i32 [[MAX_BA]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[MAX_BA]], %c
+; CHECK-NEXT:    [[MAX_ABC:%.*]] = select i1 [[TMP1]], i32 [[MAX_BA]], i32 %c
 ; CHECK-NEXT:    call void @extra_use(i32 [[MAX_BA]])
 ; CHECK-NEXT:    ret i32 [[MAX_ABC]]
 ;