[llvm] 8fce94f - [InstCombine] canonicalize icmp with trunc op into mask and cmp, part 2

Tue Nov 16 06:29:00 PST 2021

Author: Sanjay Patel
Date: 2021-11-16T09:27:30-05:00
New Revision: 8fce94f91610bc4614b1fe0bff8f143ea2b54742

URL: https://github.com/llvm/llvm-project/commit/8fce94f91610bc4614b1fe0bff8f143ea2b54742
DIFF: https://github.com/llvm/llvm-project/commit/8fce94f91610bc4614b1fe0bff8f143ea2b54742.diff

LOG: [InstCombine] canonicalize icmp with trunc op into mask and cmp, part 2

If C is a high-bit mask:
(trunc X) u< C --> (X & C) != C (are any masked-high-bits clear?)

If C is low-bit mask:
(trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?)

If C is not-of-power-of-2 (one clear bit):
(trunc X) u> C --> (X & (C+1)) == C+1 (are all masked-high-bits set?)

This extends the fold added with:
acabad9ff6bf (https://alive2.llvm.org/ce/z/aFr7qV)

Using decomposeBitTestICmp() to generalize this is a planned follow-up, but that requires removing an inverse fold.

Here are Alive2 generalizations for these folds:
https://alive2.llvm.org/ce/z/u-ZpC_ (ult, the previous patch)
https://alive2.llvm.org/ce/z/YsuAu2 (ult, this patch)
https://alive2.llvm.org/ce/z/ekktQP (ugt, low bitmask)
https://alive2.llvm.org/ce/z/pJY9wR (ugt, one clear bit)

Differential Revision: https://reviews.llvm.org/D112634

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
    llvm/test/Transforms/InstCombine/icmp-trunc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index b697ddd37aa0..7a9e177f19da 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4627,16 +4627,39 @@ static Instruction *foldICmpWithTrunc(ICmpInst &ICmp,
   unsigned SrcBits = X->getType()->getScalarSizeInBits();
   if (Pred == ICmpInst::ICMP_ULT) {
     if (C->isPowerOf2()) {
-      // If C is a power-of-2:
+      // If C is a power-of-2 (one set bit):
       // (trunc X) u< C --> (X & -C) == 0 (are all masked-high-bits clear?)
       Constant *MaskC = ConstantInt::get(X->getType(), (-*C).zext(SrcBits));
       Value *And = Builder.CreateAnd(X, MaskC);
       Constant *Zero = ConstantInt::getNullValue(X->getType());
       return new ICmpInst(ICmpInst::ICMP_EQ, And, Zero);
     }
-    // TODO: Handle C is negative-power-of-2.
+    // If C is a negative power-of-2 (high-bit mask):
+    // (trunc X) u< C --> (X & C) != C (are any masked-high-bits clear?)
+    if (C->isNegatedPowerOf2()) {
+      Constant *MaskC = ConstantInt::get(X->getType(), C->zext(SrcBits));
+      Value *And = Builder.CreateAnd(X, MaskC);
+      return new ICmpInst(ICmpInst::ICMP_NE, And, MaskC);
+    }
+  }
+
+  if (Pred == ICmpInst::ICMP_UGT) {
+    // If C is a low-bit-mask (C+1 is a power-of-2):
+    // (trunc X) u> C --> (X & ~C) != 0 (are any masked-high-bits set?)
+    if (C->isMask()) {
+      Constant *MaskC = ConstantInt::get(X->getType(), (~*C).zext(SrcBits));
+      Value *And = Builder.CreateAnd(X, MaskC);
+      Constant *Zero = ConstantInt::getNullValue(X->getType());
+      return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
+    }
+    // If C is not-of-power-of-2 (one clear bit):
+    // (trunc X) u> C --> (X & (C+1)) == C+1 (are all masked-high-bits set?)
+    if ((~*C).isPowerOf2()) {
+      Constant *MaskC = ConstantInt::get(X->getType(), (*C + 1).zext(SrcBits));
+      Value *And = Builder.CreateAnd(X, MaskC);
+      return new ICmpInst(ICmpInst::ICMP_EQ, And, MaskC);
+    }
   }
-  // TODO: Handle ugt.
 
   return nullptr;
 }

diff  --git a/llvm/test/Transforms/InstCombine/icmp-trunc.ll b/llvm/test/Transforms/InstCombine/icmp-trunc.ll
index 02e30c620461..54e4a9d0e5dc 100644
--- a/llvm/test/Transforms/InstCombine/icmp-trunc.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-trunc.ll
@@ -71,8 +71,8 @@ define i1 @PR52260(i32 %x) {
 
 define i1 @ult_192(i32 %x) {
 ; CHECK-LABEL: @ult_192(
-; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT:    [[R:%.*]] = icmp ult i8 [[T]], -64
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 192
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 192
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %t = trunc i32 %x to i8
@@ -82,8 +82,8 @@ define i1 @ult_192(i32 %x) {
 
 define <2 x i1> @ult_2044_splat(<2 x i16> %x) {
 ; CHECK-LABEL: @ult_2044_splat(
-; CHECK-NEXT:    [[T:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i11>
-; CHECK-NEXT:    [[R:%.*]] = icmp ult <2 x i11> [[T]], <i11 -4, i11 -4>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 2044, i16 2044>
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], <i16 2044, i16 2044>
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %t = trunc <2 x i16> %x to <2 x i11>
@@ -91,6 +91,8 @@ define <2 x i1> @ult_2044_splat(<2 x i16> %x) {
   ret <2 x i1> %r
 }
 
+; negative test - need high-bit-mask constant
+
 define i1 @ult_96(i32 %x) {
 ; CHECK-LABEL: @ult_96(
 ; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i8
@@ -102,6 +104,8 @@ define i1 @ult_96(i32 %x) {
   ret i1 %r
 }
 
+; negative test - no extra use allowed
+
 define i1 @ult_192_use(i32 %x) {
 ; CHECK-LABEL: @ult_192_use(
 ; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i8
@@ -117,8 +121,8 @@ define i1 @ult_192_use(i32 %x) {
 
 define i1 @ugt_3(i32 %x) {
 ; CHECK-LABEL: @ugt_3(
-; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[T]], 3
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 252
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i32 [[TMP1]], 0
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %t = trunc i32 %x to i8
@@ -128,8 +132,8 @@ define i1 @ugt_3(i32 %x) {
 
 define <2 x i1> @ugt_7_splat(<2 x i16> %x) {
 ; CHECK-LABEL: @ugt_7_splat(
-; CHECK-NEXT:    [[T:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i11>
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i11> [[T]], <i11 7, i11 7>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 2040, i16 2040>
+; CHECK-NEXT:    [[R:%.*]] = icmp ne <2 x i16> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %t = trunc <2 x i16> %x to <2 x i11>
@@ -137,6 +141,8 @@ define <2 x i1> @ugt_7_splat(<2 x i16> %x) {
   ret <2 x i1> %r
 }
 
+; negative test - need low-bit-mask constant
+
 define i1 @ugt_4(i32 %x) {
 ; CHECK-LABEL: @ugt_4(
 ; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i8
@@ -148,6 +154,8 @@ define i1 @ugt_4(i32 %x) {
   ret i1 %r
 }
 
+; negative test - no extra use allowed
+
 define i1 @ugt_3_use(i32 %x) {
 ; CHECK-LABEL: @ugt_3_use(
 ; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i8
@@ -163,8 +171,8 @@ define i1 @ugt_3_use(i32 %x) {
 
 define i1 @ugt_253(i32 %x) {
 ; CHECK-LABEL: @ugt_253(
-; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i8
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[T]], -3
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 254
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i32 [[TMP1]], 254
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %t = trunc i32 %x to i8
@@ -174,8 +182,8 @@ define i1 @ugt_253(i32 %x) {
 
 define <2 x i1> @ugt_2043_splat(<2 x i16> %x) {
 ; CHECK-LABEL: @ugt_2043_splat(
-; CHECK-NEXT:    [[T:%.*]] = trunc <2 x i16> [[X:%.*]] to <2 x i11>
-; CHECK-NEXT:    [[R:%.*]] = icmp ugt <2 x i11> [[T]], <i11 -5, i11 -5>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 2044, i16 2044>
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i16> [[TMP1]], <i16 2044, i16 2044>
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %t = trunc <2 x i16> %x to <2 x i11>
@@ -183,6 +191,8 @@ define <2 x i1> @ugt_2043_splat(<2 x i16> %x) {
   ret <2 x i1> %r
 }
 
+; negative test - need not-of-power-of-2 constant
+
 define i1 @ugt_252(i32 %x) {
 ; CHECK-LABEL: @ugt_252(
 ; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i8
@@ -194,6 +204,8 @@ define i1 @ugt_252(i32 %x) {
   ret i1 %r
 }
 
+; negative test - no extra use allowed
+
 define i1 @ugt_253_use(i32 %x) {
 ; CHECK-LABEL: @ugt_253_use(
 ; CHECK-NEXT:    [[T:%.*]] = trunc i32 [[X:%.*]] to i8