[llvm] 2cd23eb - [instcombine] Fold overflow check using umulo to comparison

Fri Jun 25 10:25:53 PDT 2021

Author: Philip Reames
Date: 2021-06-25T10:25:45-07:00
New Revision: 2cd23eb2438238b1297ff7b4368d673c449ff24f

URL: https://github.com/llvm/llvm-project/commit/2cd23eb2438238b1297ff7b4368d673c449ff24f
DIFF: https://github.com/llvm/llvm-project/commit/2cd23eb2438238b1297ff7b4368d673c449ff24f.diff

LOG: [instcombine] Fold overflow check using umulo to comparison

If we have a umul.with.overflow where the multiply result is not used and one of the operands is a constant, we can perform the overflow check cheaper with a comparison then by performing the multiply and extracting the overflow flag.

(Noticed when looking at the conditions SCEV emits for overflow checks.)

Differential Revision: https://reviews.llvm.org/D104665

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
    llvm/test/Transforms/InstCombine/umulo.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 67aca4e345add..2dee94392c218 100644

--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3083,13 +3083,36 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
         return BinaryOperator::Create(BinOp, LHS, RHS);
       }
 
-      // If the normal result of the add is dead, and the RHS is a constant,
-      // we can transform this into a range comparison.
-      // overflow = uadd a, -4  -->  overflow = icmp ugt a, 3
-      if (WO->getIntrinsicID() == Intrinsic::uadd_with_overflow)
+      assert(*EV.idx_begin() == 1 &&
+             "unexpected extract index for overflow inst");
+
+      // If the normal result of the computation is dead, and the RHS is a
+      // constant, we can transform this into a range comparison for many cases.
+      // TODO: We can generalize these for non-constant rhs when the newly
+      // formed expressions are known to simplify.  Constants are merely one
+      // such case.
+      // TODO: Handle vector splats.
+      switch (WO->getIntrinsicID()) {
+      default:
+        break;
+      case Intrinsic::uadd_with_overflow:
+        // overflow = uadd a, -4  -->  overflow = icmp ugt a, 3
         if (ConstantInt *CI = dyn_cast<ConstantInt>(WO->getRHS()))
           return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(),
                               ConstantExpr::getNot(CI));
+        break;
+      case Intrinsic::umul_with_overflow:
+        // overflow for umul a, C  --> a > UINT_MAX udiv C
+        // (unless C == 0, in which case no overflow ever occurs)
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(WO->getRHS())) {
+          assert(!CI->isZero() && "handled by instruction simplify");
+          auto UMax = APInt::getMaxValue(CI->getType()->getBitWidth());
+          auto *Op =
+            ConstantExpr::getUDiv(ConstantInt::get(CI->getType(), UMax), CI);
+          return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(), Op);
+        }
+        break;
+      };
     }
   }
   if (LoadInst *L = dyn_cast<LoadInst>(Agg))

diff  --git a/llvm/test/Transforms/InstCombine/umulo.ll b/llvm/test/Transforms/InstCombine/umulo.ll
index cce455e842881..3fb0b9443a904 100644
--- a/llvm/test/Transforms/InstCombine/umulo.ll
+++ b/llvm/test/Transforms/InstCombine/umulo.ll
@@ -35,8 +35,7 @@ define i1 @test_constant1(i8 %a) {
 
 define i1 @test_constant2(i8 %a) {
 ; CHECK-LABEL: @test_constant2(
-; CHECK-NEXT:    [[RES:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A:%.*]], i8 2)
-; CHECK-NEXT:    [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1
+; CHECK-NEXT:    [[OVERFLOW:%.*]] = icmp slt i8 [[A:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[OVERFLOW]]
 ;
   %res = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 2)
@@ -46,8 +45,7 @@ define i1 @test_constant2(i8 %a) {
 
 define i1 @test_constant3(i8 %a) {
 ; CHECK-LABEL: @test_constant3(
-; CHECK-NEXT:    [[RES:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A:%.*]], i8 3)
-; CHECK-NEXT:    [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1
+; CHECK-NEXT:    [[OVERFLOW:%.*]] = icmp ugt i8 [[A:%.*]], 85
 ; CHECK-NEXT:    ret i1 [[OVERFLOW]]
 ;
   %res = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 3)
@@ -57,8 +55,7 @@ define i1 @test_constant3(i8 %a) {
 
 define i1 @test_constant4(i8 %a) {
 ; CHECK-LABEL: @test_constant4(
-; CHECK-NEXT:    [[RES:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A:%.*]], i8 4)
-; CHECK-NEXT:    [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1
+; CHECK-NEXT:    [[OVERFLOW:%.*]] = icmp ugt i8 [[A:%.*]], 63
 ; CHECK-NEXT:    ret i1 [[OVERFLOW]]
 ;
   %res = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 4)
@@ -69,8 +66,7 @@ define i1 @test_constant4(i8 %a) {
 
 define i1 @test_constant127(i8 %a) {
 ; CHECK-LABEL: @test_constant127(
-; CHECK-NEXT:    [[RES:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A:%.*]], i8 127)
-; CHECK-NEXT:    [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1
+; CHECK-NEXT:    [[OVERFLOW:%.*]] = icmp ugt i8 [[A:%.*]], 2
 ; CHECK-NEXT:    ret i1 [[OVERFLOW]]
 ;
   %res = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 127)
@@ -80,8 +76,7 @@ define i1 @test_constant127(i8 %a) {
 
 define i1 @test_constant128(i8 %a) {
 ; CHECK-LABEL: @test_constant128(
-; CHECK-NEXT:    [[RES:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A:%.*]], i8 -128)
-; CHECK-NEXT:    [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1
+; CHECK-NEXT:    [[OVERFLOW:%.*]] = icmp ugt i8 [[A:%.*]], 1
 ; CHECK-NEXT:    ret i1 [[OVERFLOW]]
 ;
   %res = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 128)
@@ -91,8 +86,7 @@ define i1 @test_constant128(i8 %a) {
 
 define i1 @test_constant255(i8 %a) {
 ; CHECK-LABEL: @test_constant255(
-; CHECK-NEXT:    [[RES:%.*]] = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[A:%.*]], i8 -1)
-; CHECK-NEXT:    [[OVERFLOW:%.*]] = extractvalue { i8, i1 } [[RES]], 1
+; CHECK-NEXT:    [[OVERFLOW:%.*]] = icmp ugt i8 [[A:%.*]], 1
 ; CHECK-NEXT:    ret i1 [[OVERFLOW]]
 ;
   %res = tail call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 255)