[llvm] r357103 - [InstCombine] Use uadd.sat and usub.sat for canonicalization

Wed Mar 27 10:56:15 PDT 2019

Author: nikic
Date: Wed Mar 27 10:56:15 2019
New Revision: 357103

URL: http://llvm.org/viewvc/llvm-project?rev=357103&view=rev
Log:
[InstCombine] Use uadd.sat and usub.sat for canonicalization

Start using the uadd.sat and usub.sat intrinsics for the existing
canonicalizations. These intrinsics should optimize better than
expanded IR, have better handling in the X86 backend and should
be no worse than expanded IR in other backends, as far as we know.

rL357012 already introduced use of uadd.sat for the add+umin pattern.

Differential Revision: https://reviews.llvm.org/D58872

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
    llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll
    llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp?rev=357103&r1=357102&r2=357103&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineSelect.cpp Wed Mar 27 10:56:15 2019
@@ -622,11 +622,7 @@ static Value *foldSelectICmpAndOr(const
   return Builder.CreateOr(V, Y);
 }
 
-/// Transform patterns such as: (a > b) ? a - b : 0
-/// into: ((a > b) ? a : b) - b)
-/// This produces a canonical max pattern that is more easily recognized by the
-/// backend and converted into saturated subtraction instructions if those
-/// exist.
+/// Transform patterns such as (a > b) ? a - b : 0 into usub.sat(a, b).
 /// There are 8 commuted/swapped variants of this pattern.
 /// TODO: Also support a - UMIN(a,b) patterns.
 static Value *canonicalizeSaturatedSubtract(const ICmpInst *ICI,
@@ -668,11 +664,12 @@ static Value *canonicalizeSaturatedSubtr
   if (!TrueVal->hasOneUse())
     return nullptr;
 
-  // All checks passed, convert to canonical unsigned saturated subtraction
-  // form: sub(max()).
-  // (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
-  Value *Max = Builder.CreateSelect(Builder.CreateICmp(Pred, A, B), A, B);
-  return IsNegative ? Builder.CreateSub(B, Max) : Builder.CreateSub(Max, B);
+  // (a > b) ? a - b : 0 -> usub.sat(a, b)
+  // (a > b) ? b - a : 0 -> -usub.sat(a, b)
+  Value *Result = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, A, B);
+  if (IsNegative)
+    Result = Builder.CreateNeg(Result);
+  return Result;
 }
 
 static Value *canonicalizeSaturatedAdd(ICmpInst *Cmp, Value *TVal, Value *FVal,
@@ -689,15 +686,16 @@ static Value *canonicalizeSaturatedAdd(I
   if (Pred == ICmpInst::ICMP_ULT &&
       match(TVal, m_Add(m_Value(X), m_APInt(C))) && X == Cmp0 &&
       match(FVal, m_AllOnes()) && match(Cmp1, m_APInt(CmpC)) && *CmpC == ~*C) {
-    // Commute compare predicate and select operands:
-    // (X u< ~C) ? (X + C) : -1 --> (X u> ~C) ? -1 : (X + C)
-    Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, X, Cmp1);
-    return Builder.CreateSelect(NewCmp, FVal, TVal);
+    // (X u< ~C) ? (X + C) : -1 --> uadd.sat(X, C)
+    return Builder.CreateBinaryIntrinsic(
+        Intrinsic::uadd_sat, X, ConstantInt::get(X->getType(), *C));
   }
 
   // Match unsigned saturated add of 2 variables with an unnecessary 'not'.
   // There are 8 commuted variants.
-  // Canonicalize -1 (saturated result) to true value of the select.
+  // Canonicalize -1 (saturated result) to true value of the select. Just
+  // swapping the compare operands is legal, because the selected value is the
+  // same in case of equality, so we can interchange u< and u<=.
   if (match(FVal, m_AllOnes())) {
     std::swap(TVal, FVal);
     std::swap(Cmp0, Cmp1);
@@ -717,24 +715,19 @@ static Value *canonicalizeSaturatedAdd(I
   Value *Y;
   if (match(Cmp0, m_Not(m_Value(X))) &&
       match(FVal, m_c_Add(m_Specific(X), m_Value(Y))) && Y == Cmp1) {
-    // Change the comparison to use the sum (false value of the select). That is
-    // a canonical pattern match form for uadd.with.overflow and eliminates a
-    // use of the 'not' op:
-    // (~X u< Y) ? -1 : (X + Y) --> ((X + Y) u< Y) ? -1 : (X + Y)
-    // (~X u< Y) ? -1 : (Y + X) --> ((Y + X) u< Y) ? -1 : (Y + X)
-    Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y);
-    return Builder.CreateSelect(NewCmp, TVal, FVal);
+    // (~X u< Y) ? -1 : (X + Y) --> uadd.sat(X, Y)
+    // (~X u< Y) ? -1 : (Y + X) --> uadd.sat(X, Y)
+    return Builder.CreateBinaryIntrinsic(Intrinsic::uadd_sat, X, Y);
   }
   // The 'not' op may be included in the sum but not the compare.
   X = Cmp0;
   Y = Cmp1;
   if (match(FVal, m_c_Add(m_Not(m_Specific(X)), m_Specific(Y)))) {
-    // Change the comparison to use the sum (false value of the select). That is
-    // a canonical pattern match form for uadd.with.overflow:
-    // (X u< Y) ? -1 : (~X + Y) --> ((~X + Y) u< Y) ? -1 : (~X + Y)
-    // (X u< Y) ? -1 : (Y + ~X) --> ((Y + ~X) u< Y) ? -1 : (Y + ~X)
-    Value *NewCmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, FVal, Y);
-    return Builder.CreateSelect(NewCmp, TVal, FVal);
+    // (X u< Y) ? -1 : (~X + Y) --> uadd.sat(~X, Y)
+    // (X u< Y) ? -1 : (Y + ~X) --> uadd.sat(Y, ~X)
+    BinaryOperator *BO = cast<BinaryOperator>(FVal);
+    return Builder.CreateBinaryIntrinsic(
+        Intrinsic::uadd_sat, BO->getOperand(0), BO->getOperand(1));
   }
 
   return nullptr;

Modified: llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll?rev=357103&r1=357102&r2=357103&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/saturating-add-sub.ll Wed Mar 27 10:56:15 2019
@@ -893,10 +893,8 @@ define <2 x i8> @test_vector_usub_add_nu
 
 define i32 @uadd_sat(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_sat(
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %notx = xor i32 %x, -1
   %a = add i32 %y, %x
@@ -908,10 +906,8 @@ define i32 @uadd_sat(i32 %x, i32 %y) {
 define i32 @uadd_sat_commute_add(i32 %xp, i32 %y) {
 ; CHECK-LABEL: @uadd_sat_commute_add(
 ; CHECK-NEXT:    [[X:%.*]] = urem i32 42, [[XP:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %x = urem i32 42, %xp ; thwart complexity-based-canonicalization
   %notx = xor i32 %x, -1
@@ -924,10 +920,8 @@ define i32 @uadd_sat_commute_add(i32 %xp
 define i32 @uadd_sat_ugt(i32 %x, i32 %yp) {
 ; CHECK-LABEL: @uadd_sat_ugt(
 ; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y]], [[X:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
   %notx = xor i32 %x, -1
@@ -941,10 +935,8 @@ define <2 x i32> @uadd_sat_ugt_commute_a
 ; CHECK-LABEL: @uadd_sat_ugt_commute_add(
 ; CHECK-NEXT:    [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], <i32 2442, i32 4242>
 ; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> <i32 42, i32 43>, [[XP:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[X]], [[Y]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
-; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X]], <2 x i32> [[Y]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
 ;
   %y = sdiv <2 x i32> %yp, <i32 2442, i32 4242> ; thwart complexity-based-canonicalization
   %x = srem <2 x i32> <i32 42, i32 43>, %xp     ; thwart complexity-based-canonicalization
@@ -958,10 +950,8 @@ define <2 x i32> @uadd_sat_ugt_commute_a
 define i32 @uadd_sat_commute_select(i32 %x, i32 %yp) {
 ; CHECK-LABEL: @uadd_sat_commute_select(
 ; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y]], [[X:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 [[Y]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
   %notx = xor i32 %x, -1
@@ -975,10 +965,8 @@ define i32 @uadd_sat_commute_select_comm
 ; CHECK-LABEL: @uadd_sat_commute_select_commute_add(
 ; CHECK-NEXT:    [[X:%.*]] = urem i32 42, [[XP:%.*]]
 ; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 [[YP:%.*]], 2442
-; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %x = urem i32 42, %xp ; thwart complexity-based-canonicalization
   %y = sdiv i32 %yp, 2442 ; thwart complexity-based-canonicalization
@@ -991,10 +979,8 @@ define i32 @uadd_sat_commute_select_comm
 
 define <2 x i32> @uadd_sat_commute_select_ugt(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @uadd_sat_commute_select_ugt(
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[Y:%.*]], [[X:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
-; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
 ;
   %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
   %a = add <2 x i32> %y, %x
@@ -1006,10 +992,8 @@ define <2 x i32> @uadd_sat_commute_selec
 define i32 @uadd_sat_commute_select_ugt_commute_add(i32 %xp, i32 %y) {
 ; CHECK-LABEL: @uadd_sat_commute_select_ugt_commute_add(
 ; CHECK-NEXT:    [[X:%.*]] = srem i32 42, [[XP:%.*]]
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[X]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %x = srem i32 42, %xp   ; thwart complexity-based-canonicalization
   %notx = xor i32 %x, -1
@@ -1054,10 +1038,8 @@ define i32 @not_uadd_sat2(i32 %x, i32 %y
 define i32 @uadd_sat_not(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_sat_not(
 ; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %notx = xor i32 %x, -1
   %a = add i32 %notx, %y
@@ -1071,10 +1053,8 @@ define i32 @uadd_sat_not_commute_add(i32
 ; CHECK-NEXT:    [[X:%.*]] = srem i32 42, [[XP:%.*]]
 ; CHECK-NEXT:    [[Y:%.*]] = urem i32 42, [[YP:%.*]]
 ; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X]], -1
-; CHECK-NEXT:    [[A:%.*]] = add nsw i32 [[Y]], [[NOTX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y]], i32 [[NOTX]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %x = srem i32 42, %xp ; thwart complexity-based-canonicalization
   %y = urem i32 42, %yp ; thwart complexity-based-canonicalization
@@ -1088,10 +1068,8 @@ define i32 @uadd_sat_not_commute_add(i32
 define i32 @uadd_sat_not_ugt(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_sat_not_ugt(
 ; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %notx = xor i32 %x, -1
   %a = add i32 %notx, %y
@@ -1104,10 +1082,8 @@ define <2 x i32> @uadd_sat_not_ugt_commu
 ; CHECK-LABEL: @uadd_sat_not_ugt_commute_add(
 ; CHECK-NEXT:    [[Y:%.*]] = sdiv <2 x i32> [[YP:%.*]], <i32 2442, i32 4242>
 ; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[Y]], [[NOTX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
-; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
 ;
   %y = sdiv <2 x i32> %yp, <i32 2442, i32 4242> ; thwart complexity-based-canonicalization
   %notx = xor <2 x i32> %x, <i32 -1, i32 -1>
@@ -1120,10 +1096,8 @@ define <2 x i32> @uadd_sat_not_ugt_commu
 define i32 @uadd_sat_not_commute_select(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_sat_not_commute_select(
 ; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %notx = xor i32 %x, -1
   %a = add i32 %notx, %y
@@ -1136,10 +1110,8 @@ define i32 @uadd_sat_not_commute_select_
 ; CHECK-LABEL: @uadd_sat_not_commute_select_commute_add(
 ; CHECK-NEXT:    [[Y:%.*]] = sdiv i32 42, [[YP:%.*]]
 ; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[Y]], [[NOTX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[Y]], i32 [[NOTX]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %y = sdiv i32 42, %yp ; thwart complexity-based-canonicalization
   %notx = xor i32 %x, -1
@@ -1154,10 +1126,8 @@ define <2 x i32> @uadd_sat_not_commute_s
 ; CHECK-NEXT:    [[X:%.*]] = urem <2 x i32> <i32 42, i32 -42>, [[XP:%.*]]
 ; CHECK-NEXT:    [[Y:%.*]] = srem <2 x i32> <i32 12, i32 412>, [[YP:%.*]]
 ; CHECK-NEXT:    [[NOTX:%.*]] = xor <2 x i32> [[X]], <i32 -1, i32 -1>
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i32> [[Y]], [[NOTX]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i32> [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> <i32 -1, i32 -1>, <2 x i32> [[A]]
-; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[Y]], <2 x i32> [[NOTX]])
+; CHECK-NEXT:    ret <2 x i32> [[TMP1]]
 ;
   %x = urem <2 x i32> <i32 42, i32 -42>, %xp ; thwart complexity-based-canonicalization
   %y = srem <2 x i32> <i32 12, i32 412>, %yp ; thwart complexity-based-canonicalization
@@ -1171,10 +1141,8 @@ define <2 x i32> @uadd_sat_not_commute_s
 define i32 @uadd_sat_not_commute_select_ugt_commute_add(i32 %x, i32 %y) {
 ; CHECK-LABEL: @uadd_sat_not_commute_select_ugt_commute_add(
 ; CHECK-NEXT:    [[NOTX:%.*]] = xor i32 [[X:%.*]], -1
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[NOTX]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[A]], [[Y]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[NOTX]], i32 [[Y:%.*]])
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %notx = xor i32 %x, -1
   %a = add i32 %notx, %y
@@ -1198,10 +1166,8 @@ define i32 @uadd_sat_constant(i32 %x) {
 
 define i32 @uadd_sat_constant_commute(i32 %x) {
 ; CHECK-LABEL: @uadd_sat_constant_commute(
-; CHECK-NEXT:    [[A:%.*]] = add i32 [[X:%.*]], 42
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i32 [[X]], -43
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 -1, i32 [[A]]
-; CHECK-NEXT:    ret i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.uadd.sat.i32(i32 [[X:%.*]], i32 42)
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %a = add i32 %x, 42
   %c = icmp ult i32 %x, -43
@@ -1224,10 +1190,8 @@ define <4 x i32> @uadd_sat_constant_vec(
 
 define <4 x i32> @uadd_sat_constant_vec_commute(<4 x i32> %x) {
 ; CHECK-LABEL: @uadd_sat_constant_vec_commute(
-; CHECK-NEXT:    [[A:%.*]] = add <4 x i32> [[X:%.*]], <i32 42, i32 42, i32 42, i32 42>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> [[X]], <i32 -43, i32 -43, i32 -43, i32 -43>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> [[A]]
-; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> [[X:%.*]], <4 x i32> <i32 42, i32 42, i32 42, i32 42>)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
   %c = icmp ult <4 x i32> %x, <i32 -43, i32 -43, i32 -43, i32 -43>

Modified: llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll?rev=357103&r1=357102&r2=357103&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/unsigned_saturated_sub.ll Wed Mar 27 10:56:15 2019
@@ -1,21 +1,17 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -instcombine -S < %s | FileCheck %s
 
-; Transforms for unsigned saturated subtraction idioms are tested here.
-; In all cases, we want to form a canonical min/max op (the compare and
-; select operands are the same), so that is recognized by the backend.
-; The backend recognition is tested in test/CodeGen/X86/psubus.ll.
+; Canonicalization of unsigned saturated subtraction idioms to
+; usub.sat() intrinsics is tested here.
 
 declare void @use(i64)
 
-; (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
+; (a > b) ? a - b : 0 -> usub.sat(a, b)
 
 define i64 @max_sub_ugt(i64 %a, i64 %b) {
 ; CHECK-LABEL: @max_sub_ugt(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
   %cmp = icmp ugt i64 %a, %b
   %sub = sub i64 %a, %b
@@ -23,14 +19,12 @@ define i64 @max_sub_ugt(i64 %a, i64 %b)
   ret i64 %sel
 }
 
-; (a >= b) ? a - b : 0 -> ((a >= b) ? a : b) - b)
+; (a >= b) ? a - b : 0 -> usub.sat(a, b)
 
 define i64 @max_sub_uge(i64 %a, i64 %b) {
 ; CHECK-LABEL: @max_sub_uge(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
   %cmp = icmp uge i64 %a, %b
   %sub = sub i64 %a, %b
@@ -39,14 +33,12 @@ define i64 @max_sub_uge(i64 %a, i64 %b)
 }
 
 ; Again, with vectors:
-; (a > b) ? a - b : 0 -> ((a > b) ? a : b) - b)
+; (a > b) ? a - b : 0 -> usub.sat(a, b)
 
 define <4 x i32> @max_sub_ugt_vec(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @max_sub_ugt_vec(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[A]], <4 x i32> [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub <4 x i32> [[TMP2]], [[B]]
-; CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
 ;
   %cmp = icmp ugt <4 x i32> %a, %b
   %sub = sub <4 x i32> %a, %b
@@ -55,16 +47,14 @@ define <4 x i32> @max_sub_ugt_vec(<4 x i
 }
 
 ; Use extra ops to thwart icmp swapping canonicalization.
-; (b < a) ? a - b : 0 -> ((a > b) ? a : b) - b)
+; (b < a) ? a - b : 0 -> usub.sat(a, b)
 
 define i64 @max_sub_ult(i64 %a, i64 %b) {
 ; CHECK-LABEL: @max_sub_ult(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
 ; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]]
 ; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
   %cmp = icmp ult i64 %b, %a
   %sub = sub i64 %a, %b
@@ -74,16 +64,14 @@ define i64 @max_sub_ult(i64 %a, i64 %b)
   ret i64 %sel
 }
 
-; (b > a) ? 0 : a - b -> ((a > b) ? a : b) - b)
+; (b > a) ? 0 : a - b -> usub.sat(a, b)
 
 define i64 @max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
 ; CHECK-LABEL: @max_sub_ugt_sel_swapped(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[B:%.*]], [[A:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
 ; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[B]], [[A]]
 ; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
   %cmp = icmp ugt i64 %b, %a
   %sub = sub i64 %a, %b
@@ -93,14 +81,12 @@ define i64 @max_sub_ugt_sel_swapped(i64
   ret i64 %sel
 }
 
-; (a < b) ? 0 : a - b -> ((a > b) ? a : b) - b)
+; (a < b) ? 0 : a - b -> usub.sat(a, b)
 
 define i64 @max_sub_ult_sel_swapped(i64 %a, i64 %b) {
 ; CHECK-LABEL: @max_sub_ult_sel_swapped(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[B]]
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    ret i64 [[TMP1]]
 ;
   %cmp = icmp ult i64 %a, %b
   %sub = sub i64 %a, %b
@@ -108,16 +94,15 @@ define i64 @max_sub_ult_sel_swapped(i64
   ret i64 %sel
 }
 
-; ((a > b) ? b - a : 0) -> (b - ((a > b) ? a : b))
+; ((a > b) ? b - a : 0) -> -usub.sat(a, b)
 
 define i64 @neg_max_sub_ugt(i64 %a, i64 %b) {
 ; CHECK-LABEL: @neg_max_sub_ugt(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
 ; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]]
 ; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %cmp = icmp ugt i64 %a, %b
   %sub = sub i64 %b, %a
@@ -127,14 +112,13 @@ define i64 @neg_max_sub_ugt(i64 %a, i64
   ret i64 %sel
 }
 
-; ((b < a) ? b - a : 0) -> - ((a > b) ? a : b) - b)
+; ((b < a) ? b - a : 0) -> -usub.sat(a, b)
 
 define i64 @neg_max_sub_ult(i64 %a, i64 %b) {
 ; CHECK-LABEL: @neg_max_sub_ult(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[A]], i64 [[B]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %cmp = icmp ult i64 %b, %a
   %sub = sub i64 %b, %a
@@ -142,14 +126,13 @@ define i64 @neg_max_sub_ult(i64 %a, i64
   ret i64 %sel
 }
 
-; ((b > a) ? 0 : b - a) -> - ((a > b) ? a : b) - b)
+; ((b > a) ? 0 : b - a) -> -usub.sat(a, b)
 
 define i64 @neg_max_sub_ugt_sel_swapped(i64 %a, i64 %b) {
 ; CHECK-LABEL: @neg_max_sub_ugt_sel_swapped(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %cmp = icmp ugt i64 %b, %a
   %sub = sub i64 %b, %a
@@ -157,16 +140,15 @@ define i64 @neg_max_sub_ugt_sel_swapped(
   ret i64 %sel
 }
 
-; ((a < b) ? 0 : b - a) -> - ((a > b) ? a : b) - b)
+; ((a < b) ? 0 : b - a) -> -usub.sat(a, b)
 
 define i64 @neg_max_sub_ult_sel_swapped(i64 %a, i64 %b) {
 ; CHECK-LABEL: @neg_max_sub_ult_sel_swapped(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i64 [[B]], i64 [[A]]
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[B]], [[TMP2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.usub.sat.i64(i64 [[A:%.*]], i64 [[B:%.*]])
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i64 0, [[TMP1]]
 ; CHECK-NEXT:    [[EXTRASUB:%.*]] = sub i64 [[A]], [[B]]
 ; CHECK-NEXT:    call void @use(i64 [[EXTRASUB]])
-; CHECK-NEXT:    ret i64 [[TMP3]]
+; CHECK-NEXT:    ret i64 [[TMP2]]
 ;
   %cmp = icmp ult i64 %a, %b
   %sub = sub i64 %b, %a