[llvm] r306525 - [InstCombine] Canonicalize clamp of float types to minmax in fast mode.

Wed Jun 28 02:26:21 PDT 2017

Author: n.bozhenov
Date: Wed Jun 28 02:26:20 2017
New Revision: 306525

URL: http://llvm.org/viewvc/llvm-project?rev=306525&view=rev
Log:
[InstCombine] Canonicalize clamp of float types to minmax in fast mode.

Summary:
This commit allows matchSelectPattern to recognize clamp of float
arguments in the presence of FMF the same way as already done for
integers.

This case is a little different though. With integers, given the
min/max pattern is recognized, DAGBuilder starts selecting MIN/MAX
"automatically". That is not the case for float, because for them only
full FMINNAN/FMINNUM/FMAXNAN/FMAXNUM ISD nodes exist and they do care
about NaNs. On the other hand, some backends (e.g. X86) have only
FMIN/FMAX nodes that do not care about NaNS and the former NAN/NUM
nodes are illegal thus selection is not happening. So I decided to do
such kind of transformation in IR (InstCombiner) instead of
complicating the logic in the backend.

Reviewers: spatel, jmolloy, majnemer, efriedma, craig.topper

Reviewed By: efriedma

Subscribers: hiraditya, javed.absar, n.bozhenov, llvm-commits

Patch by Andrei Elovikov <andrei.elovikov at intel.com>

Differential Revision: https://reviews.llvm.org/D33186

Modified:
    llvm/trunk/include/llvm/IR/PatternMatch.h
    llvm/trunk/lib/Analysis/ValueTracking.cpp
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
    llvm/trunk/test/Transforms/InstCombine/clamp-to-minmax.ll

Modified: llvm/trunk/include/llvm/IR/PatternMatch.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/PatternMatch.h?rev=306525&r1=306524&r2=306525&view=diff
==============================================================================

--- llvm/trunk/include/llvm/IR/PatternMatch.h (original)
+++ llvm/trunk/include/llvm/IR/PatternMatch.h Wed Jun 28 02:26:20 2017
@@ -195,11 +195,35 @@ struct apint_match {
     return false;
   }
 };
+// Either constexpr if or renaming ConstantFP::getValueAPF to
+// ConstantFP::getValue is needed to do it via single template
+// function for both apint/apfloat.
+struct apfloat_match {
+  const APFloat *&Res;
+  apfloat_match(const APFloat *&R) : Res(R) {}
+  template <typename ITy> bool match(ITy *V) {
+    if (auto *CI = dyn_cast<ConstantFP>(V)) {
+      Res = &CI->getValueAPF();
+      return true;
+    }
+    if (V->getType()->isVectorTy())
+      if (const auto *C = dyn_cast<Constant>(V))
+        if (auto *CI = dyn_cast_or_null<ConstantFP>(C->getSplatValue())) {
+          Res = &CI->getValueAPF();
+          return true;
+        }
+    return false;
+  }
+};
 
 /// \brief Match a ConstantInt or splatted ConstantVector, binding the
 /// specified pointer to the contained APInt.
 inline apint_match m_APInt(const APInt *&Res) { return Res; }
 
+/// \brief Match a ConstantFP or splatted ConstantVector, binding the
+/// specified pointer to the contained APFloat.
+inline apfloat_match m_APFloat(const APFloat *&Res) { return Res; }
+
 template <int64_t Val> struct constantint_match {
   template <typename ITy> bool match(ITy *V) {
     if (const auto *CI = dyn_cast<ConstantInt>(V)) {

Modified: llvm/trunk/lib/Analysis/ValueTracking.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ValueTracking.cpp?rev=306525&r1=306524&r2=306525&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/ValueTracking.cpp (original)
+++ llvm/trunk/lib/Analysis/ValueTracking.cpp Wed Jun 28 02:26:20 2017
@@ -3933,6 +3933,62 @@ static bool isKnownNonZero(const Value *
   return false;
 }
 
+/// Match clamp pattern for float types without care about NaNs or signed zeros.
+/// Given non-min/max outer cmp/select from the clamp pattern this
+/// function recognizes if it can be substitued by a "canonical" min/max
+/// pattern.
+static SelectPatternResult matchFastFloatClamp(CmpInst::Predicate Pred,
+                                               Value *CmpLHS, Value *CmpRHS,
+                                               Value *TrueVal, Value *FalseVal,
+                                               Value *&LHS, Value *&RHS) {
+  // Try to match
+  //   X < C1 ? C1 : Min(X, C2) --> Max(C1, Min(X, C2))
+  //   X > C1 ? C1 : Max(X, C2) --> Min(C1, Max(X, C2))
+  // and return description of the outer Max/Min.
+
+  // First, check if select has inverse order:
+  if (CmpRHS == FalseVal) {
+    std::swap(TrueVal, FalseVal);
+    Pred = CmpInst::getInversePredicate(Pred);
+  }
+
+  // Assume success now. If there's no match, callers should not use these anyway.
+  LHS = TrueVal;
+  RHS = FalseVal;
+
+  const APFloat *FC1;
+  if (CmpRHS != TrueVal || !match(CmpRHS, m_APFloat(FC1)) || !FC1->isFinite())
+    return {SPF_UNKNOWN, SPNB_NA, false};
+
+  const APFloat *FC2;
+  switch (Pred) {
+  case CmpInst::FCMP_OLT:
+  case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ULT:
+  case CmpInst::FCMP_ULE:
+    if (match(FalseVal,
+              m_CombineOr(m_OrdFMin(m_Specific(CmpLHS), m_APFloat(FC2)),
+                          m_UnordFMin(m_Specific(CmpLHS), m_APFloat(FC2)))) &&
+        FC1->compare(*FC2) == APFloat::cmpResult::cmpLessThan)
+      return {SPF_FMAXNUM, SPNB_RETURNS_ANY, false};
+    break;
+  case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_OGE:
+  case CmpInst::FCMP_UGT:
+  case CmpInst::FCMP_UGE:
+    if (match(FalseVal,
+              m_CombineOr(m_OrdFMax(m_Specific(CmpLHS), m_APFloat(FC2)),
+                          m_UnordFMax(m_Specific(CmpLHS), m_APFloat(FC2)))) &&
+        FC1->compare(*FC2) == APFloat::cmpResult::cmpGreaterThan)
+      return {SPF_FMINNUM, SPNB_RETURNS_ANY, false};
+    break;
+  default:
+    break;
+  }
+
+  return {SPF_UNKNOWN, SPNB_NA, false};
+}
+
 /// Match non-obvious integer minimum and maximum sequences.
 static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
                                        Value *CmpLHS, Value *CmpRHS,
@@ -4140,7 +4196,18 @@ static SelectPatternResult matchSelectPa
     }
   }
 
-  return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
+  if (CmpInst::isIntPredicate(Pred))
+    return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
+
+  // According to (IEEE 754-2008 5.3.1), minNum(0.0, -0.0) and similar
+  // may return either -0.0 or 0.0, so fcmp/select pair has stricter
+  // semantics than minNum. Be conservative in such case.
+  if (NaNBehavior != SPNB_RETURNS_ANY ||
+      (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) &&
+       !isKnownNonZero(CmpRHS)))
+    return {SPF_UNKNOWN, SPNB_NA, false};
+
+  return matchFastFloatClamp(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
 }
 
 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp?rev=306525&r1=306524&r2=306525&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp Wed Jun 28 02:26:20 2017
@@ -1374,9 +1374,16 @@ Instruction *InstCombiner::visitSelectIn
     auto SPF = SPR.Flavor;
 
     if (SelectPatternResult::isMinOrMax(SPF)) {
-      // Canonicalize so that type casts are outside select patterns.
-      if (LHS->getType()->getPrimitiveSizeInBits() !=
-          SelType->getPrimitiveSizeInBits()) {
+      // Canonicalize so that
+      // - type casts are outside select patterns.
+      // - float clamp is transformed to min/max pattern
+      Value *CmpLHS = cast<CmpInst>(CondVal)->getOperand(0);
+      Value *CmpRHS = cast<CmpInst>(CondVal)->getOperand(1);
+      if ((LHS->getType()->getPrimitiveSizeInBits() !=
+           SelType->getPrimitiveSizeInBits()) ||
+          (LHS->getType()->isFPOrFPVectorTy() &&
+           ((CmpLHS != LHS && CmpLHS != RHS) ||
+            (CmpRHS != LHS && CmpRHS != RHS)))) {
         CmpInst::Predicate Pred = getCmpPredicateForMinMax(SPF, SPR.Ordered);
 
         Value *Cmp;

Modified: llvm/trunk/test/Transforms/InstCombine/clamp-to-minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/clamp-to-minmax.ll?rev=306525&r1=306524&r2=306525&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/clamp-to-minmax.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/clamp-to-minmax.ll Wed Jun 28 02:26:20 2017
@@ -7,9 +7,9 @@ define float @clamp_float_fast_ordered_s
 ; CHECK-LABEL: @clamp_float_fast_ordered_strict_maxmin(
 ; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
 ; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast olt float [[X]], 1.000000e+00
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
-; CHECK-NEXT:    ret float [[R]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MIN]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[R1]]
 ;
   %cmp2 = fcmp fast olt float %x, 255.0
   %min = select i1 %cmp2, float %x, float 255.0
@@ -24,9 +24,9 @@ define float @clamp_float_fast_ordered_n
 ; CHECK-LABEL: @clamp_float_fast_ordered_nonstrict_maxmin(
 ; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast olt float [[X:%.*]], 2.550000e+02
 ; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2]], float [[X]], float 2.550000e+02
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast ole float [[X]], 1.000000e+00
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
-; CHECK-NEXT:    ret float [[R]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MIN]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[R1]]
 ;
   %cmp2 = fcmp fast olt float %x, 255.0
   %min = select i1 %cmp2, float %x, float 255.0
@@ -41,9 +41,9 @@ define float @clamp_float_fast_ordered_s
 ; CHECK-LABEL: @clamp_float_fast_ordered_strict_minmax(
 ; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast ogt float [[X:%.*]], 1.000000e+00
 ; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast ogt float [[X]], 2.550000e+02
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
-; CHECK-NEXT:    ret float [[R]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MAX]], float 2.550000e+02
+; CHECK-NEXT:    ret float [[R1]]
 ;
   %cmp2 = fcmp fast ogt float %x, 1.0
   %max = select i1 %cmp2, float %x, float 1.0
@@ -58,9 +58,9 @@ define float @clamp_float_fast_ordered_n
 ; CHECK-LABEL: @clamp_float_fast_ordered_nonstrict_minmax(
 ; CHECK-NEXT:    [[CMP2:%.*]] = fcmp fast ogt float [[X:%.*]], 1.000000e+00
 ; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2]], float [[X]], float 1.000000e+00
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast oge float [[X]], 2.550000e+02
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
-; CHECK-NEXT:    ret float [[R]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MAX]], float 2.550000e+02
+; CHECK-NEXT:    ret float [[R1]]
 ;
   %cmp2 = fcmp fast ogt float %x, 1.0
   %max = select i1 %cmp2, float %x, float 1.0
@@ -78,9 +78,9 @@ define float @clamp_float_fast_unordered
 ; CHECK-LABEL: @clamp_float_fast_unordered_strict_maxmin(
 ; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
 ; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast ult float [[X]], 1.000000e+00
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
-; CHECK-NEXT:    ret float [[R]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MIN]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[R1]]
 ;
   %cmp2 = fcmp fast ult float %x, 255.0
   %min = select i1 %cmp2, float %x, float 255.0
@@ -95,9 +95,9 @@ define float @clamp_float_fast_unordered
 ; CHECK-LABEL: @clamp_float_fast_unordered_nonstrict_maxmin(
 ; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
 ; CHECK-NEXT:    [[MIN:%.*]] = select i1 [[CMP2_INV]], float 2.550000e+02, float [[X]]
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast ule float [[X]], 1.000000e+00
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 1.000000e+00, float [[MIN]]
-; CHECK-NEXT:    ret float [[R]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[MIN]], 1.000000e+00
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MIN]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[R1]]
 ;
   %cmp2 = fcmp fast ult float %x, 255.0
   %min = select i1 %cmp2, float %x, float 255.0
@@ -112,9 +112,9 @@ define float @clamp_float_fast_unordered
 ; CHECK-LABEL: @clamp_float_fast_unordered_strict_minmax(
 ; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp fast ole float [[X:%.*]], 1.000000e+00
 ; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast ugt float [[X]], 2.550000e+02
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
-; CHECK-NEXT:    ret float [[R]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MAX]], float 2.550000e+02
+; CHECK-NEXT:    ret float [[R1]]
 ;
   %cmp2 = fcmp fast ugt float %x, 1.0
   %max = select i1 %cmp2, float %x, float 1.0
@@ -129,9 +129,9 @@ define float @clamp_float_fast_unordered
 ; CHECK-LABEL: @clamp_float_fast_unordered_nonstrict_minmax(
 ; CHECK-NEXT:    [[CMP2_INV:%.*]] = fcmp fast ole float [[X:%.*]], 1.000000e+00
 ; CHECK-NEXT:    [[MAX:%.*]] = select i1 [[CMP2_INV]], float 1.000000e+00, float [[X]]
-; CHECK-NEXT:    [[CMP1:%.*]] = fcmp fast uge float [[X]], 2.550000e+02
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[CMP1]], float 2.550000e+02, float [[MAX]]
-; CHECK-NEXT:    ret float [[R]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast ole float [[MAX]], 2.550000e+02
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[MAX]], float 2.550000e+02
+; CHECK-NEXT:    ret float [[R1]]
 ;
   %cmp2 = fcmp fast ugt float %x, 1.0
   %max = select i1 %cmp2, float %x, float 1.0
@@ -143,13 +143,14 @@ define float @clamp_float_fast_unordered
 ; Some more checks with fast
 
 ; (X > 1.0) ? min(x, 255.0) : 1.0
+; That did not match because select was in inverse order.
 define float @clamp_test_1(float %x) {
 ; CHECK-LABEL: @clamp_test_1(
 ; CHECK-NEXT:    [[INNER_CMP_INV:%.*]] = fcmp fast oge float [[X:%.*]], 2.550000e+02
 ; CHECK-NEXT:    [[INNER_SEL:%.*]] = select i1 [[INNER_CMP_INV]], float 2.550000e+02, float [[X]]
-; CHECK-NEXT:    [[OUTER_CMP:%.*]] = fcmp fast ugt float [[X]], 1.000000e+00
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[OUTER_CMP]], float [[INNER_SEL]], float 1.000000e+00
-; CHECK-NEXT:    ret float [[R]]
+; CHECK-NEXT:    [[DOTINV:%.*]] = fcmp fast oge float [[INNER_SEL]], 1.000000e+00
+; CHECK-NEXT:    [[R1:%.*]] = select i1 [[DOTINV]], float [[INNER_SEL]], float 1.000000e+00
+; CHECK-NEXT:    ret float [[R1]]
 ;
   %inner_cmp = fcmp fast ult float %x, 255.0
   %inner_sel = select i1 %inner_cmp, float %x, float 255.0