[llvm] 9829f5e - [CVP] @llvm.[us]{min,max}() intrinsics handling

Sat Apr 10 14:34:12 PDT 2021

Author: Roman Lebedev
Date: 2021-04-11T00:33:47+03:00
New Revision: 9829f5e6b1bca9b61efc629770d28bb9014dec45

URL: https://github.com/llvm/llvm-project/commit/9829f5e6b1bca9b61efc629770d28bb9014dec45
DIFF: https://github.com/llvm/llvm-project/commit/9829f5e6b1bca9b61efc629770d28bb9014dec45.diff

LOG: [CVP] @llvm.[us]{min,max}() intrinsics handling

If we can tell that either one of the arguments is taken,
bypass the intrinsic.

Notably, we are indeed fine with non-strict predicate:
* UL: https://alive2.llvm.org/ce/z/69qVW9 https://alive2.llvm.org/ce/z/kNFTKf
      https://alive2.llvm.org/ce/z/AvaPw2 https://alive2.llvm.org/ce/z/oxo53i
* UG: https://alive2.llvm.org/ce/z/wxHeGH https://alive2.llvm.org/ce/z/Lf76qx
* SL: https://alive2.llvm.org/ce/z/hkeTGS https://alive2.llvm.org/ce/z/eR_b-W
* SG: https://alive2.llvm.org/ce/z/wEqRm7 https://alive2.llvm.org/ce/z/FpAsVr

Much like with all other comparison handling in CVP,
while we could sort-of handle two Value's,
at least for plain ICmpInst it does not appear to be worthwhile.

This only fires 78 times on test-suite + dt + rs,
but we don't canonicalize to these yet. (only SCEV produces them)

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/LazyValueInfo.h
    llvm/lib/Analysis/LazyValueInfo.cpp
    llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
    llvm/test/Transforms/CorrelatedValuePropagation/min-max.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h
index d5a407352284..57f732cc854b 100644

--- a/llvm/include/llvm/Analysis/LazyValueInfo.h
+++ b/llvm/include/llvm/Analysis/LazyValueInfo.h
@@ -77,6 +77,14 @@ class LazyValueInfo {
   Tristate getPredicateAt(unsigned Pred, Value *V, Constant *C,
                           Instruction *CxtI, bool UseBlockValue);
 
+  /// Determine whether the specified value comparison is known to be true
+  /// or false at the specified instruction. While this takes two Value's,
+  /// it still requires that one of them is a constant.
+  /// \p Pred is a CmpInst predicate.
+  /// If \p UseBlockValue is true, the block value is also taken into account.
+  Tristate getPredicateAt(unsigned Pred, Value *LHS, Value *RHS,
+                          Instruction *CxtI, bool UseBlockValue);
+
   /// Determine whether the specified value is known to be a constant at the
   /// specified instruction. Return null if not.
   Constant *getConstant(Value *V, Instruction *CxtI);

diff  --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 6bf03e884c93..24631d7da2fe 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -1812,6 +1812,24 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
   return Unknown;
 }
 
+LazyValueInfo::Tristate LazyValueInfo::getPredicateAt(unsigned P, Value *LHS,
+                                                      Value *RHS,
+                                                      Instruction *CxtI,
+                                                      bool UseBlockValue) {
+  CmpInst::Predicate Pred = (CmpInst::Predicate)P;
+
+  if (auto *C = dyn_cast<Constant>(RHS))
+    return getPredicateAt(P, LHS, C, CxtI, UseBlockValue);
+  if (auto *C = dyn_cast<Constant>(LHS))
+    return getPredicateAt(CmpInst::getSwappedPredicate(Pred), RHS, C, CxtI,
+                          UseBlockValue);
+
+  // Got two non-Constant values. While we could handle them somewhat,
+  // by getting their constant ranges, and applying ConstantRange::icmp(),
+  // so far it did not appear to be profitable.
+  return LazyValueInfo::Unknown;
+}
+
 void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
                                BasicBlock *NewSucc) {
   if (PImpl) {

diff  --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index f7137d281810..819d8a3e6d2c 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -87,6 +87,7 @@ STATISTIC(NumOverflows, "Number of overflow checks removed");
 STATISTIC(NumSaturating,
     "Number of saturating arithmetics converted to normal arithmetics");
 STATISTIC(NumNonNull, "Number of function pointer arguments marked non-null");
+STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed");
 
 namespace {
 
@@ -499,6 +500,19 @@ static void processAbsIntrinsic(IntrinsicInst *II, LazyValueInfo *LVI) {
     processBinOp(BO, LVI);
 }
 
+// See if this min/max intrinsic always picks it's one specific operand.
+static void processMinMaxIntrinsic(MinMaxIntrinsic *MM, LazyValueInfo *LVI) {
+  CmpInst::Predicate Pred = CmpInst::getNonStrictPredicate(MM->getPredicate());
+  LazyValueInfo::Tristate Result = LVI->getPredicateAt(
+      Pred, MM->getLHS(), MM->getRHS(), MM, /*UseBlockValue=*/true);
+  if (Result == LazyValueInfo::Unknown)
+    return;
+
+  ++NumMinMax;
+  MM->replaceAllUsesWith(MM->getOperand(!Result));
+  MM->eraseFromParent();
+}
+
 // Rewrite this with.overflow intrinsic as non-overflowing.
 static void processOverflowIntrinsic(WithOverflowInst *WO, LazyValueInfo *LVI) {
   IRBuilder<> B(WO);
@@ -550,6 +564,11 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
     return true;
   }
 
+  if (auto *MM = dyn_cast<MinMaxIntrinsic>(&CB)) {
+    processMinMaxIntrinsic(MM, LVI);
+    return true;
+  }
+
   if (auto *WO = dyn_cast<WithOverflowInst>(&CB)) {
     if (WO->getLHS()->getType()->isIntegerTy() && willNotOverflow(WO, LVI)) {
       processOverflowIntrinsic(WO, LVI);

diff  --git a/llvm/test/Transforms/CorrelatedValuePropagation/min-max.ll b/llvm/test/Transforms/CorrelatedValuePropagation/min-max.ll
index c5053833e748..7fc3755c2630 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/min-max.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/min-max.ll
@@ -60,8 +60,7 @@ define i8 @test5(i8 %x) {
 ; CHECK-LABEL: @test5(
 ; CHECK-NEXT:    [[LIM:%.*]] = icmp ule i8 [[X:%.*]], 42
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[LIM]])
-; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.umin.i8(i8 [[X]], i8 42)
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 [[X]]
 ;
   %lim = icmp ule i8 %x, 42
   call void @llvm.assume(i1 %lim)
@@ -109,8 +108,7 @@ define i8 @test9(i8 %x) {
 ; CHECK-LABEL: @test9(
 ; CHECK-NEXT:    [[LIM:%.*]] = icmp uge i8 [[X:%.*]], 42
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[LIM]])
-; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.umax.i8(i8 [[X]], i8 42)
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 [[X]]
 ;
   %lim = icmp uge i8 %x, 42
   call void @llvm.assume(i1 %lim)
@@ -158,8 +156,7 @@ define i8 @test13(i8 %x) {
 ; CHECK-LABEL: @test13(
 ; CHECK-NEXT:    [[LIM:%.*]] = icmp sle i8 [[X:%.*]], 42
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[LIM]])
-; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.smin.i8(i8 [[X]], i8 42)
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 [[X]]
 ;
   %lim = icmp sle i8 %x, 42
   call void @llvm.assume(i1 %lim)
@@ -207,8 +204,7 @@ define i8 @test17(i8 %x) {
 ; CHECK-LABEL: @test17(
 ; CHECK-NEXT:    [[LIM:%.*]] = icmp sge i8 [[X:%.*]], 42
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[LIM]])
-; CHECK-NEXT:    [[R:%.*]] = call i8 @llvm.smax.i8(i8 [[X]], i8 42)
-; CHECK-NEXT:    ret i8 [[R]]
+; CHECK-NEXT:    ret i8 [[X]]
 ;
   %lim = icmp sge i8 %x, 42
   call void @llvm.assume(i1 %lim)