[llvm] 4220ef2 - [InstCombine] Add fold for redundant sign bits count comparison

Sat Jul 30 02:07:13 PDT 2022

Author: Alexander Shaposhnikov
Date: 2022-07-30T09:06:53Z
New Revision: 4220ef2be1c911f92b48a895fdd18e4077b322f5

URL: https://github.com/llvm/llvm-project/commit/4220ef2be1c911f92b48a895fdd18e4077b322f5
DIFF: https://github.com/llvm/llvm-project/commit/4220ef2be1c911f92b48a895fdd18e4077b322f5.diff

LOG: [InstCombine] Add fold for redundant sign bits count comparison

For power-of-2 C:
((X s>> ShiftC) ^ X) u< C --> (X + C) u< (C << 1)
((X s>> ShiftC) ^ X) u> (C - 1) --> (X + C) u> ((C << 1) - 1)

(https://github.com/llvm/llvm-project/issues/56479)

Test plan:
0/ ninja check-llvm check-clang + bootstrap LLVM/Clang
1/ https://alive2.llvm.org/ce/z/eEUfx3

Differential revision: https://reviews.llvm.org/D130433

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/lib/Transforms/InstCombine/InstCombineInternal.h
    llvm/test/Transforms/InstCombine/icmp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index fc3b115e09bd..4ac04fb826fc 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1597,6 +1597,9 @@ Instruction *InstCombinerImpl::foldICmpTruncConstant(ICmpInst &Cmp,
 Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp,
                                                    BinaryOperator *Xor,
                                                    const APInt &C) {
+  if (Instruction *I = foldICmpXorShiftConst(Cmp, Xor, C))
+    return I;
+
   Value *X = Xor->getOperand(0);
   Value *Y = Xor->getOperand(1);
   const APInt *XorC;
@@ -1660,6 +1663,37 @@ Instruction *InstCombinerImpl::foldICmpXorConstant(ICmpInst &Cmp,
   return nullptr;
 }
 
+/// For power-of-2 C:
+/// ((X s>> ShiftC) ^ X) u< C --> (X + C) u< (C << 1)
+/// ((X s>> ShiftC) ^ X) u> (C - 1) --> (X + C) u> ((C << 1) - 1)
+Instruction *InstCombinerImpl::foldICmpXorShiftConst(ICmpInst &Cmp,
+                                                     BinaryOperator *Xor,
+                                                     const APInt &C) {
+  CmpInst::Predicate Pred = Cmp.getPredicate();
+  APInt PowerOf2;
+  if (Pred == ICmpInst::ICMP_ULT)
+    PowerOf2 = C;
+  else if (Pred == ICmpInst::ICMP_UGT && !C.isMaxValue())
+    PowerOf2 = C + 1;
+  else
+    return nullptr;
+  if (!PowerOf2.isPowerOf2())
+    return nullptr;
+  Value *X;
+  const APInt *ShiftC;
+  if (!match(Xor, m_OneUse(m_c_Xor(m_Value(X),
+                                   m_AShr(m_Deferred(X), m_APInt(ShiftC))))))
+    return nullptr;
+  uint64_t Shift = ShiftC->getLimitedValue();
+  Type *XType = X->getType();
+  if (Shift == 0 || PowerOf2.isMinSignedValue())
+    return nullptr;
+  Value *Add = Builder.CreateAdd(X, ConstantInt::get(XType, PowerOf2));
+  APInt Bound =
+      Pred == ICmpInst::ICMP_ULT ? PowerOf2 << 1 : ((PowerOf2 << 1) - 1);
+  return new ICmpInst(Pred, Add, ConstantInt::get(XType, Bound));
+}
+
 /// Fold icmp (and (sh X, Y), C2), C1.
 Instruction *InstCombinerImpl::foldICmpAndShift(ICmpInst &Cmp,
                                                 BinaryOperator *And,

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 664226ec187b..cb533908e3fe 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -716,6 +716,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
                                      const APInt &C1);
   Instruction *foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
                                 const APInt &C1, const APInt &C2);
+  Instruction *foldICmpXorShiftConst(ICmpInst &Cmp, BinaryOperator *Xor,
+                                     const APInt &C);
   Instruction *foldICmpShrConstConst(ICmpInst &I, Value *ShAmt, const APInt &C1,
                                      const APInt &C2);
   Instruction *foldICmpShlConstConst(ICmpInst &I, Value *ShAmt, const APInt &C1,

diff  --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index ef8da2ff6fb0..1cfbeb6ed1d3 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -4113,9 +4113,8 @@ define i1 @signbit_true_logic_uses_commute(i64 %x) {
 
 define i1 @redundant_sign_bit_count_ult_1_2(i32 %x) {
 ; CHECK-LABEL: @redundant_sign_bit_count_ult_1_2(
-; CHECK-NEXT:    [[Y:%.*]] = ashr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[Z:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[Z]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], 4
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[TMP1]], 8
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %y = ashr i32 %x, 1
@@ -4126,9 +4125,8 @@ define i1 @redundant_sign_bit_count_ult_1_2(i32 %x) {
 
 define i1 @redundant_sign_bit_count_ult_1_30(i32 %x) {
 ; CHECK-LABEL: @redundant_sign_bit_count_ult_1_30(
-; CHECK-NEXT:    [[Y:%.*]] = ashr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[Z:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[Z]], 1073741824
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], 1073741824
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %y = ashr i32 %x, 1
@@ -4139,9 +4137,8 @@ define i1 @redundant_sign_bit_count_ult_1_30(i32 %x) {
 
 define i1 @redundant_sign_bit_count_ult_31_2(i32 %x) {
 ; CHECK-LABEL: @redundant_sign_bit_count_ult_31_2(
-; CHECK-NEXT:    [[Y:%.*]] = ashr i32 [[X:%.*]], 31
-; CHECK-NEXT:    [[Z:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[Z]], 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], 4
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[TMP1]], 8
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %y = ashr i32 %x, 31
@@ -4152,9 +4149,8 @@ define i1 @redundant_sign_bit_count_ult_31_2(i32 %x) {
 
 define i1 @redundant_sign_bit_count_ult_31_30(i32 %x) {
 ; CHECK-LABEL: @redundant_sign_bit_count_ult_31_30(
-; CHECK-NEXT:    [[Y:%.*]] = ashr i32 [[X:%.*]], 31
-; CHECK-NEXT:    [[Z:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[Z]], 1073741824
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], 1073741824
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %y = ashr i32 %x, 31
@@ -4169,8 +4165,8 @@ define i1 @redundant_sign_bit_count_ult_31_30_extra_use_ashr(i32 %x) {
 ; CHECK-LABEL: @redundant_sign_bit_count_ult_31_30_extra_use_ashr(
 ; CHECK-NEXT:    [[Y:%.*]] = ashr i32 [[X:%.*]], 31
 ; CHECK-NEXT:    call void @use_i32(i32 [[Y]])
-; CHECK-NEXT:    [[Z:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[Z]], 1073741824
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X]], 1073741824
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %y = ashr i32 %x, 31
@@ -4224,9 +4220,8 @@ define i1 @wrong_shift_opcode_i8(i8 %x) {
 define i1 @redundant_sign_bit_count_ult_31_30_commute(i32 %xsrc) {
 ; CHECK-LABEL: @redundant_sign_bit_count_ult_31_30_commute(
 ; CHECK-NEXT:    [[X:%.*]] = mul i32 [[XSRC:%.*]], 13
-; CHECK-NEXT:    [[Y:%.*]] = ashr i32 [[X]], 31
-; CHECK-NEXT:    [[Z:%.*]] = xor i32 [[X]], [[Y]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[Z]], 1073741824
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X]], 1073741824
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt i32 [[TMP1]], -1
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %x = mul i32 %xsrc, 13 ; thwart complexity-based canonicalization
@@ -4238,9 +4233,8 @@ define i1 @redundant_sign_bit_count_ult_31_30_commute(i32 %xsrc) {
 
 define i1 @redundant_sign_bit_count_i8(i8 %x) {
 ; CHECK-LABEL: @redundant_sign_bit_count_i8(
-; CHECK-NEXT:    [[Y:%.*]] = ashr i8 [[X:%.*]], 5
-; CHECK-NEXT:    [[Z:%.*]] = xor i8 [[Y]], [[X]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 [[Z]], 2
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[X:%.*]], 2
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i8 [[TMP1]], 4
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %y = ashr i8 %x, 5
@@ -4252,9 +4246,8 @@ define i1 @redundant_sign_bit_count_i8(i8 %x) {
 define <2 x i1> @redundant_sign_bit_count_ult_31_30_vector(<2 x i32> %xsrc) {
 ; CHECK-LABEL: @redundant_sign_bit_count_ult_31_30_vector(
 ; CHECK-NEXT:    [[X:%.*]] = mul <2 x i32> [[XSRC:%.*]], <i32 13, i32 13>
-; CHECK-NEXT:    [[Y:%.*]] = ashr <2 x i32> [[X]], <i32 31, i32 31>
-; CHECK-NEXT:    [[Z:%.*]] = xor <2 x i32> [[X]], [[Y]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ult <2 x i32> [[Z]], <i32 1073741824, i32 1073741824>
+; CHECK-NEXT:    [[TMP1:%.*]] = add <2 x i32> [[X]], <i32 1073741824, i32 1073741824>
+; CHECK-NEXT:    [[C:%.*]] = icmp sgt <2 x i32> [[TMP1]], <i32 -1, i32 -1>
 ; CHECK-NEXT:    ret <2 x i1> [[C]]
 ;
   %x = mul <2 x i32> %xsrc, <i32 13, i32 13> ; thwart complexity-based canonicalization
@@ -4266,9 +4259,8 @@ define <2 x i1> @redundant_sign_bit_count_ult_31_30_vector(<2 x i32> %xsrc) {
 
 define i1 @redundant_sign_bit_count_ugt_1_2(i32 %x) {
 ; CHECK-LABEL: @redundant_sign_bit_count_ugt_1_2(
-; CHECK-NEXT:    [[Y:%.*]] = ashr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[Z:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[Z]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -4
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[TMP1]], -8
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %y = ashr i32 %x, 1
@@ -4279,9 +4271,8 @@ define i1 @redundant_sign_bit_count_ugt_1_2(i32 %x) {
 
 define i1 @redundant_sign_bit_count_ugt_1_30(i32 %x) {
 ; CHECK-LABEL: @redundant_sign_bit_count_ugt_1_30(
-; CHECK-NEXT:    [[Y:%.*]] = ashr i32 [[X:%.*]], 1
-; CHECK-NEXT:    [[Z:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[Z]], 1073741823
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], 1073741824
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %y = ashr i32 %x, 1
@@ -4292,9 +4283,8 @@ define i1 @redundant_sign_bit_count_ugt_1_30(i32 %x) {
 
 define i1 @redundant_sign_bit_count_ugt_31_2(i32 %x) {
 ; CHECK-LABEL: @redundant_sign_bit_count_ugt_31_2(
-; CHECK-NEXT:    [[Y:%.*]] = ashr i32 [[X:%.*]], 31
-; CHECK-NEXT:    [[Z:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[Z]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], -4
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[TMP1]], -8
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %y = ashr i32 %x, 31
@@ -4305,9 +4295,8 @@ define i1 @redundant_sign_bit_count_ugt_31_2(i32 %x) {
 
 define i1 @redundant_sign_bit_count_ugt_31_30(i32 %x) {
 ; CHECK-LABEL: @redundant_sign_bit_count_ugt_31_30(
-; CHECK-NEXT:    [[Y:%.*]] = ashr i32 [[X:%.*]], 31
-; CHECK-NEXT:    [[Z:%.*]] = xor i32 [[Y]], [[X]]
-; CHECK-NEXT:    [[C:%.*]] = icmp ugt i32 [[Z]], 1073741823
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[X:%.*]], 1073741824
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[C]]
 ;
   %y = ashr i32 %x, 31