[llvm] 2a88d00 - [InstCombine] fold sub-of-umax to 0-usubsat

Tue Nov 9 09:46:11 PST 2021

Author: Sanjay Patel
Date: 2021-11-09T12:46:03-05:00
New Revision: 2a88d00cf250534f31c706bc832f0f6386c28ef3

URL: https://github.com/llvm/llvm-project/commit/2a88d00cf250534f31c706bc832f0f6386c28ef3
DIFF: https://github.com/llvm/llvm-project/commit/2a88d00cf250534f31c706bc832f0f6386c28ef3.diff

LOG: [InstCombine] fold sub-of-umax to 0-usubsat

Op0 - umax(X, Op0) --> 0 - usub.sat(X, Op1)

I'm not sure if this is really an improvement in IR because
we probably have better recognition/analysis for min/max,
but this lines up with the fold we do for the icmp+select
idiom and removes another diff from D98152.

This is similar to the previous fold in the code that was
added with:
83c2fb9f66be
baa6a851308d

https://alive2.llvm.org/ce/z/5MrVB9

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
    llvm/test/Transforms/InstCombine/sub-minmax.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 3e0cc549b2b5..8cc64950a32f 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2175,6 +2175,12 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
     return replaceInstUsesWith(
         I, Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op1}));
 
+  // Op0 - umax(X, Op0) --> 0 - usub.sat(X, Op1)
+  if (match(Op1, m_OneUse(m_c_UMax(m_Value(X), m_Specific(Op0))))) {
+    Value *USub = Builder.CreateIntrinsic(Intrinsic::usub_sat, {Ty}, {X, Op0});
+    return BinaryOperator::CreateNeg(USub);
+  }
+
   // C - ctpop(X) => ctpop(~X) if C is bitwidth
   if (match(Op0, m_SpecificInt(Ty->getScalarSizeInBits())) &&
       match(Op1, m_OneUse(m_Intrinsic<Intrinsic::ctpop>(m_Value(X)))))

diff  --git a/llvm/test/Transforms/InstCombine/sub-minmax.ll b/llvm/test/Transforms/InstCombine/sub-minmax.ll
index 6b58b167302c..5b1100ddb9fb 100644
--- a/llvm/test/Transforms/InstCombine/sub-minmax.ll
+++ b/llvm/test/Transforms/InstCombine/sub-minmax.ll
@@ -534,8 +534,8 @@ define i8 @umax_sub_op0_use(i8 %x, i8 %y) {
 
 define i8 @umax_sub_op1(i8 %x, i8 %y) {
 ; CHECK-LABEL: @umax_sub_op1(
-; CHECK-NEXT:    [[U:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = sub i8 [[Y]], [[U]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.usub.sat.i8(i8 [[X:%.*]], i8 [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = sub i8 0, [[TMP1]]
 ; CHECK-NEXT:    ret i8 [[R]]
 ;
   %u = call i8 @llvm.umax.i8(i8 %x, i8 %y)
@@ -545,8 +545,8 @@ define i8 @umax_sub_op1(i8 %x, i8 %y) {
 
 define <2 x i8> @umax_sub_op1_vec_commute(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @umax_sub_op1_vec_commute(
-; CHECK-NEXT:    [[U:%.*]] = call <2 x i8> @llvm.umax.v2i8(<2 x i8> [[Y:%.*]], <2 x i8> [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = sub <2 x i8> [[Y]], [[U]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i8> @llvm.usub.sat.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = sub <2 x i8> zeroinitializer, [[TMP1]]
 ; CHECK-NEXT:    ret <2 x i8> [[R]]
 ;
   %u = call <2 x i8> @llvm.umax.v2i8(<2 x i8> %y, <2 x i8> %x)