[llvm] 6c716c8 - [InstCombine] add more folds for unsigned overflow checks

Wed Dec 29 13:10:21 PST 2021

Author: Sanjay Patel
Date: 2021-12-29T15:53:56-05:00
New Revision: 6c716c8589506cec407f01b0cd60005a3e346cf0

URL: https://github.com/llvm/llvm-project/commit/6c716c8589506cec407f01b0cd60005a3e346cf0
DIFF: https://github.com/llvm/llvm-project/commit/6c716c8589506cec407f01b0cd60005a3e346cf0.diff

LOG: [InstCombine] add more folds for unsigned overflow checks

 ((Op1 + C) & C) u<  Op1 --> Op1 != 0
 ((Op1 + C) & C) u>= Op1 --> Op1 == 0
 Op0 u>  ((Op0 + C) & C) --> Op0 != 0
 Op0 u<= ((Op0 + C) & C) --> Op0 == 0

https://alive2.llvm.org/ce/z/iUfXJN
https://alive2.llvm.org/ce/z/caAtjj

  define i1 @src(i8 %x, i8 %y) {
    ; the add/mask must be with a low-bit mask (0x01ff...)
    %y1 = add i8 %y, 1
    %pop = call i8 @llvm.ctpop.i8(i8 %y1)
    %ismask = icmp eq i8 %pop, 1
    call void @llvm.assume(i1 %ismask)

    %a = add i8 %x, %y
    %m = and i8 %a, %y
    %r = icmp ult i8 %m, %x
    ret i1 %r
  }

  define i1 @tgt(i8 %x, i8 %y) {
    %r = icmp ne i8 %x, 0
    ret i1 %r
  }

I suspect this can be generalized in some way, but this
is the pattern I'm seeing in a motivating test based on
issue #52851.

Added: 
    

Modified: 
    llvm/include/llvm/IR/PatternMatch.h
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 320deb80bb1f8..f9f4f16038619 100644

--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -589,6 +589,9 @@ struct is_lowbit_mask {
 inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
   return cst_pred_ty<is_lowbit_mask>();
 }
+inline api_pred_ty<is_lowbit_mask> m_LowBitMask(const APInt *&V) {
+  return V;
+}
 
 struct icmp_pred_with_threshold {
   ICmpInst::Predicate Pred;

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 59e131bd3b6a2..5b6728e466fc0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -3949,6 +3949,33 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
       (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE))
     return new ICmpInst(Pred, X, Builder.CreateNot(Op0));
 
+  {
+    // Similar to above: an unsigned overflow comparison may use offset + mask:
+    // ((Op1 + C) & C) u<  Op1 --> Op1 != 0
+    // ((Op1 + C) & C) u>= Op1 --> Op1 == 0
+    // Op0 u>  ((Op0 + C) & C) --> Op0 != 0
+    // Op0 u<= ((Op0 + C) & C) --> Op0 == 0
+    BinaryOperator *BO;
+    const APInt *C;
+    if ((Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_UGE) &&
+        match(Op0, m_And(m_BinOp(BO), m_LowBitMask(C))) &&
+        match(BO, m_Add(m_Specific(Op1), m_SpecificIntAllowUndef(*C)))) {
+      CmpInst::Predicate NewPred =
+          Pred == ICmpInst::ICMP_ULT ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
+      Constant *Zero = ConstantInt::getNullValue(Op1->getType());
+      return new ICmpInst(NewPred, Op1, Zero);
+    }
+
+    if ((Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULE) &&
+        match(Op1, m_And(m_BinOp(BO), m_LowBitMask(C))) &&
+        match(BO, m_Add(m_Specific(Op0), m_SpecificIntAllowUndef(*C)))) {
+      CmpInst::Predicate NewPred =
+          Pred == ICmpInst::ICMP_UGT ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
+      Constant *Zero = ConstantInt::getNullValue(Op1->getType());
+      return new ICmpInst(NewPred, Op0, Zero);
+    }
+  }
+
   bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
   if (BO0 && isa<OverflowingBinaryOperator>(BO0))
     NoOp0WrapProblem =

diff  --git a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll
index 11ec67bd67527..e76fcbad61c82 100644
--- a/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll
+++ b/llvm/test/Transforms/InstCombine/unsigned-add-lack-of-overflow-check.ll
@@ -200,9 +200,7 @@ define i1 @n15_wrong_pred7(i8 %x, i8 %y) {
 
 define i1 @low_bitmask_ult(i8 %x) {
 ; CHECK-LABEL: @low_bitmask_ult(
-; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], 31
-; CHECK-NEXT:    [[M:%.*]] = and i8 [[A]], 31
-; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[M]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[X:%.*]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %a = add i8 %x, 31
@@ -213,9 +211,7 @@ define i1 @low_bitmask_ult(i8 %x) {
 
 define <2 x i1> @low_bitmask_uge(<2 x i8> %x) {
 ; CHECK-LABEL: @low_bitmask_uge(
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[X:%.*]], <i8 15, i8 undef>
-; CHECK-NEXT:    [[M:%.*]] = and <2 x i8> [[A]], <i8 15, i8 15>
-; CHECK-NEXT:    [[R:%.*]] = icmp uge <2 x i8> [[M]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %a = add <2 x i8> %x, <i8 15, i8 undef>
@@ -227,9 +223,7 @@ define <2 x i1> @low_bitmask_uge(<2 x i8> %x) {
 define i1 @low_bitmask_ugt(i8 %px) {
 ; CHECK-LABEL: @low_bitmask_ugt(
 ; CHECK-NEXT:    [[X:%.*]] = mul i8 [[PX:%.*]], [[PX]]
-; CHECK-NEXT:    [[A:%.*]] = add i8 [[X]], 127
-; CHECK-NEXT:    [[M:%.*]] = and i8 [[A]], 127
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], [[M]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[X]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = mul i8 %px, %px
@@ -242,9 +236,7 @@ define i1 @low_bitmask_ugt(i8 %px) {
 define <2 x i1> @low_bitmask_ule(<2 x i8> %px) {
 ; CHECK-LABEL: @low_bitmask_ule(
 ; CHECK-NEXT:    [[X:%.*]] = mul <2 x i8> [[PX:%.*]], [[PX]]
-; CHECK-NEXT:    [[A:%.*]] = add <2 x i8> [[X]], <i8 3, i8 3>
-; CHECK-NEXT:    [[M:%.*]] = and <2 x i8> [[A]], <i8 3, i8 3>
-; CHECK-NEXT:    [[R:%.*]] = icmp ule <2 x i8> [[X]], [[M]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[X]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %x = mul <2 x i8> %px, %px
@@ -259,7 +251,7 @@ define i1 @low_bitmask_ult_use(i8 %x) {
 ; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], 7
 ; CHECK-NEXT:    [[M:%.*]] = and i8 [[A]], 7
 ; CHECK-NEXT:    call void @use8(i8 [[M]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[M]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[X]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %a = add i8 %x, 7
@@ -274,8 +266,7 @@ define i1 @low_bitmask_ugt_use(i8 %px) {
 ; CHECK-NEXT:    [[X:%.*]] = mul i8 [[PX:%.*]], [[PX]]
 ; CHECK-NEXT:    [[A:%.*]] = add i8 [[X]], 3
 ; CHECK-NEXT:    call void @use8(i8 [[A]])
-; CHECK-NEXT:    [[M:%.*]] = and i8 [[A]], 3
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], [[M]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[X]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %x = mul i8 %px, %px
@@ -286,6 +277,8 @@ define i1 @low_bitmask_ugt_use(i8 %px) {
   ret i1 %r
 }
 
+; negative test - need same low bitmask
+
 define i1 @low_bitmask_ult_wrong_mask1(i8 %x) {
 ; CHECK-LABEL: @low_bitmask_ult_wrong_mask1(
 ; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], 30
@@ -299,6 +292,8 @@ define i1 @low_bitmask_ult_wrong_mask1(i8 %x) {
   ret i1 %r
 }
 
+; negative test - need same low bitmask
+
 define i1 @low_bitmask_uge_wrong_mask2(i8 %x) {
 ; CHECK-LABEL: @low_bitmask_uge_wrong_mask2(
 ; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], 31
@@ -312,6 +307,8 @@ define i1 @low_bitmask_uge_wrong_mask2(i8 %x) {
   ret i1 %r
 }
 
+; negative test - predicate mandates operand order
+
 define i1 @low_bitmask_ugt_swapped(i8 %x) {
 ; CHECK-LABEL: @low_bitmask_ugt_swapped(
 ; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], 127
@@ -325,6 +322,8 @@ define i1 @low_bitmask_ugt_swapped(i8 %x) {
   ret i1 %r
 }
 
+; negative test - unsigned preds only
+
 define i1 @low_bitmask_sgt(i8 %px) {
 ; CHECK-LABEL: @low_bitmask_sgt(
 ; CHECK-NEXT:    [[X:%.*]] = mul i8 [[PX:%.*]], [[PX]]
@@ -340,6 +339,8 @@ define i1 @low_bitmask_sgt(i8 %px) {
   ret i1 %r
 }
 
+; negative test - specific operand must match
+
 define i1 @low_bitmask_ult_specific_op(i8 %x, i8 %y) {
 ; CHECK-LABEL: @low_bitmask_ult_specific_op(
 ; CHECK-NEXT:    [[A:%.*]] = add i8 [[X:%.*]], 31