[llvm] r324255 - [InstCombine] add unsigned saturation subtraction canonicalizations

Mon Feb 5 09:53:29 PST 2018

Author: spatel
Date: Mon Feb  5 09:53:29 2018
New Revision: 324255

URL: http://llvm.org/viewvc/llvm-project?rev=324255&view=rev
Log:
[InstCombine] add unsigned saturation subtraction canonicalizations

This is the instcombine part of unsigned saturation canonicalization.
Backend patches already commited: 
https://reviews.llvm.org/D37510
https://reviews.llvm.org/D37534

It converts unsigned saturated subtraction patterns to forms recognized 
by the backend:
(a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
(b < a) ? a - b : 0 -> ((a > b) ? a : b) - b)
(b > a) ? 0 : a - b -> ((a > b) ? a : b) - b)
(a < b) ? 0 : a - b -> ((a > b) ? a : b) - b)
((a > b) ? b - a : 0) -> - ((a > b) ? a : b) - b)
((b < a) ? b - a : 0) -> - ((a > b) ? a : b) - b)
((b > a) ? 0 : b - a) -> - ((a > b) ? a : b) - b)
((a < b) ? 0 : b - a) -> - ((a > b) ? a : b) - b)

Patch by Yulia Koval!

Differential Revision: https://reviews.llvm.org/D41480

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
    llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp?rev=324255&r1=324254&r2=324255&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp Mon Feb  5 09:53:29 2018
@@ -538,6 +538,59 @@ static Value *foldSelectICmpAndOr(const
   return Builder.CreateOr(V, Y);
 }
 
+/// Transform patterns such as: (a > b) ? a - b : 0
+/// into: ((a > b) ? a : b) - b)
+/// This produces a canonical max pattern that is more easily recognized by the
+/// backend and converted into saturated subtraction instructions if those
+/// exist.
+/// There are 8 commuted/swapped variants of this pattern.
+/// TODO: Also support a - UMIN(a,b) patterns.
+static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
+                                            const Value *TrueVal,
+                                            const Value *FalseVal,
+                                            InstCombiner::BuilderTy &Builder) {
+  ICmpInst::Predicate Pred = ICI->getPredicate();
+  if (!ICmpInst::isUnsigned(Pred))
+    return nullptr;
+
+  // (b > a) ? 0 : a - b -> (b <= a) ? a - b : 0
+  if (match(TrueVal, m_Zero())) {
+    Pred = ICmpInst::getInversePredicate(Pred);
+    std::swap(TrueVal, FalseVal);
+  }
+  if (!match(FalseVal, m_Zero()))
+    return nullptr;
+
+  Value *A = ICI->getOperand(0);
+  Value *B = ICI->getOperand(1);
+  if (Pred == ICmpInst::ICMP_ULE || Pred == ICmpInst::ICMP_ULT) {
+    // (b < a) ? a - b : 0 -> (a > b) ? a - b : 0
+    std::swap(A, B);
+    Pred = ICmpInst::getSwappedPredicate(Pred);
+  }
+
+  assert((Pred == ICmpInst::ICMP_UGE || Pred == ICmpInst::ICMP_UGT) &&
+         "Unexpected isUnsigned predicate!");
+
+  // Account for swapped form of subtraction: ((a > b) ? b - a : 0).
+  bool IsNegative = false;
+  if (match(TrueVal, m_Sub(m_Specific(B), m_Specific(A))))
+    IsNegative = true;
+  else if (!match(TrueVal, m_Sub(m_Specific(A), m_Specific(B))))
+    return nullptr;
+
+  // If sub is used anywhere else, we wouldn't be able to eliminate it
+  // afterwards.
+  if (!TrueVal->hasOneUse())
+    return nullptr;
+
+  // All checks passed, convert to canonical unsigned saturated subtraction
+  // form: sub(max()).
+  // (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
+  Value *Max = Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
+  return IsNegative ? Builder.CreateSub(B, Max) : Builder.CreateSub(Max, B);
+}
+
 /// Attempt to fold a cttz/ctlz followed by a icmp plus select into a single
 /// call to cttz/ctlz with flag 'is_zero_undef' cleared.
 ///
@@ -860,10 +913,12 @@ Instruction *InstCombiner::foldSelectIns
   if (Value *V = foldSelectCttzCtlz(ICI, TrueVal, FalseVal, Builder))
     return replaceInstUsesWith(SI, V);
 
+  if (Value *V = canonicalizeSaturatedSubtract(ICI, TrueVal, FalseVal, Builder))
+    return replaceInstUsesWith(SI, V);
+
   return Changed ? &SI : nullptr;
 }
 
-
 /// SI is a select whose condition is a PHI node (but the two may be in
 /// different blocks). See if the true/false values (V) are live in all of the
 /// predecessor blocks of the PHI. For example, cases like this can't be mapped:

Modified: llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll?rev=324255&r1=324254&r2=324255&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll Mon Feb  5 09:53:29 2018
@@ -12,10 +12,10 @@ declare void @use(i64)
 
 define i64 @max_sub_ugt(i64 %a, i64 %b) {
 ; CHECK-LABEL: @max_sub_ugt(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[A]], [[B]]
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0
-; CHECK-NEXT:    ret i64 [[SEL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %cmp = icmp ugt i64 %a, %b
   %sub = sub i64 %a, %b
@@ -23,15 +23,30 @@ define i64 @max_sub_ugt(i64 %a, i64 %b)
   ret i64 %sel
 }
 
+; (a >= b) ? a - b : 0 -> ((a >= b) ? a : b) - b)
+
+define i64 @max_sub_uge(i64 %a, i64 %b) {
+; CHECK-LABEL: @max_sub_uge(
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
+;
+  %cmp = icmp uge i64 %a, %b
+  %sub = sub i64 %a, %b
+  %sel = select i1 %cmp, i64 %sub ,i64 0
+  ret i64 %sel
+}
+
 ; Again, with vectors:
 ; (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
 
 define <4 x i32> @max_sub_ugt_vec(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @max_sub_ugt_vec(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i32> [[A]], [[B]]
-; CHECK-NEXT:    [[SEL:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[SUB]], <4 x i32> zeroinitializer
-; CHECK-NEXT:    ret <4 x i32> [[SEL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[B]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], [[B]]
+; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
 ;
   %cmp = icmp ugt <4 x i32> %a, %b
   %sub = sub <4 x i32> %a, %b
@@ -44,12 +59,12 @@ define <4 x i32> @max_sub_ugt_vec(<4 x i
 
 define i64 @max_sub_ult(i64 %a, i64 %b) {
 ; CHECK-LABEL: @max_sub_ult(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[A]], [[B]]
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
 ; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]]
 ; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
-; CHECK-NEXT:    ret i64 [[SEL]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %cmp = icmp ult i64 %b, %a
   %sub = sub i64 %a, %b
@@ -63,12 +78,12 @@ define i64 @max_sub_ult(i64 %a, i64 %b)
 
 define i64 @max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
 ; CHECK-LABEL: @max_sub_ugt_sel_swapped(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[A]], [[B]]
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
 ; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]]
 ; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
-; CHECK-NEXT:    ret i64 [[SEL]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %cmp = icmp ugt i64 %b, %a
   %sub = sub i64 %a, %b
@@ -82,10 +97,10 @@ define i64 @max_sub_ugt_sel_swapped(i64
 
 define i64 @max_sub_ult_sel_swapped(i64 %a, i64 %b) {
 ; CHECK-LABEL: @max_sub_ult_sel_swapped(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[A]], [[B]]
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]]
-; CHECK-NEXT:    ret i64 [[SEL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %cmp = icmp ult i64 %a, %b
   %sub = sub i64 %a, %b
@@ -97,12 +112,12 @@ define i64 @max_sub_ult_sel_swapped(i64
 
 define i64 @neg_max_sub_ugt(i64 %a, i64 %b) {
 ; CHECK-LABEL: @neg_max_sub_ugt(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[B]], [[A]]
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
 ; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]]
 ; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
-; CHECK-NEXT:    ret i64 [[SEL]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %cmp = icmp ugt i64 %a, %b
   %sub = sub i64 %b, %a
@@ -116,10 +131,10 @@ define i64 @neg_max_sub_ugt(i64 %a, i64
 
 define i64 @neg_max_sub_ult(i64 %a, i64 %b) {
 ; CHECK-LABEL: @neg_max_sub_ult(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[B]], [[A]]
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i64 [[SUB]], i64 0
-; CHECK-NEXT:    ret i64 [[SEL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %cmp = icmp ult i64 %b, %a
   %sub = sub i64 %b, %a
@@ -131,10 +146,10 @@ define i64 @neg_max_sub_ult(i64 %a, i64
 
 define i64 @neg_max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
 ; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[B]], [[A]]
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]]
-; CHECK-NEXT:    ret i64 [[SEL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %cmp = icmp ugt i64 %b, %a
   %sub = sub i64 %b, %a
@@ -146,12 +161,12 @@ define i64 @neg_max_sub_ugt_sel_swapped(
 
 define i64 @neg_max_sub_ult_sel_swapped(i64 %a, i64 %b) {
 ; CHECK-LABEL: @neg_max_sub_ult_sel_swapped(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[B]], [[A]]
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i64 0, i64 [[SUB]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
 ; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]]
 ; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
-; CHECK-NEXT:    ret i64 [[SEL]]
+; CHECK-NEXT:    ret i64 [[TMP3]]
 ;
   %cmp = icmp ult i64 %a, %b
   %sub = sub i64 %b, %a