[llvm] a041c4e - [InstCombine] fold zext of masked bit set/clear

Tue Dec 31 09:48:48 PST 2019

Author: Sanjay Patel
Date: 2019-12-31T12:35:10-05:00
New Revision: a041c4ec6f7aa659b235cb67e9231a05e0a33b7d

URL: https://github.com/llvm/llvm-project/commit/a041c4ec6f7aa659b235cb67e9231a05e0a33b7d
DIFF: https://github.com/llvm/llvm-project/commit/a041c4ec6f7aa659b235cb67e9231a05e0a33b7d.diff

LOG: [InstCombine] fold zext of masked bit set/clear

This does not solve PR17101, but it is one of the
underlying diffs noted here:
https://bugs.llvm.org/show_bug.cgi?id=17101#c8

We could ease the one-use checks for the 'clear'
(no 'not' op) half of the transform, but I do not
know if that asymmetry would make things better
or worse.

Proofs:
https://rise4fun.com/Alive/uVB

  Name: masked bit set
  %sh1 = shl i32 1, %y
  %and = and i32 %sh1, %x
  %cmp = icmp ne i32 %and, 0
  %r = zext i1 %cmp to i32
  =>
  %s = lshr i32 %x, %y
  %r = and i32 %s, 1

  Name: masked bit clear
  %sh1 = shl i32 1, %y
  %and = and i32 %sh1, %x
  %cmp = icmp eq i32 %and, 0
  %r = zext i1 %cmp to i32
  =>
  %xn = xor i32 %x, -1
  %s = lshr i32 %xn, %y
  %r = and i32 %s, 1

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
    llvm/test/Transforms/InstCombine/zext.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 3ba56bbe53e0..b9be41840ffa 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -922,10 +922,24 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *Cmp, ZExtInst &Zext,
     }
   }
 
-  // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
-  // It is also profitable to transform icmp eq into not(xor(A, B)) because that
-  // may lead to additional simplifications.
   if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
+    // Test if a bit is clear/set using a shifted-one mask:
+    // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
+    // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
+    Value *X, *ShAmt;
+    if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
+        match(Cmp->getOperand(0),
+              m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
+      if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
+        X = Builder.CreateNot(X);
+      Value *Lshr = Builder.CreateLShr(X, ShAmt);
+      Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
+      return replaceInstUsesWith(Zext, And1);
+    }
+
+    // icmp ne A, B is equal to xor A, B when A and B only really have one bit.
+    // It is also profitable to transform icmp eq into not(xor(A, B)) because
+    // that may lead to additional simplifications.
     if (IntegerType *ITy = dyn_cast<IntegerType>(Zext.getType())) {
       Value *LHS = Cmp->getOperand(0);
       Value *RHS = Cmp->getOperand(1);

diff  --git a/llvm/test/Transforms/InstCombine/zext.ll b/llvm/test/Transforms/InstCombine/zext.ll
index 1dbb9ffd7e08..9351f5cea4dd 100644
--- a/llvm/test/Transforms/InstCombine/zext.ll
+++ b/llvm/test/Transforms/InstCombine/zext.ll
@@ -177,11 +177,9 @@ declare void @use32(i32)
 
 define i32 @masked_bit_set(i32 %x, i32 %y) {
 ; CHECK-LABEL: @masked_bit_set(
-; CHECK-NEXT:    [[SH1:%.*]] = shl i32 1, [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SH1]], [[X:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT:    [[R:%.*]] = zext i1 [[CMP]] to i32
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %sh1 = shl i32 1, %y
   %and = and i32 %sh1, %x
@@ -192,11 +190,10 @@ define i32 @masked_bit_set(i32 %x, i32 %y) {
 
 define <2 x i32> @masked_bit_clear(<2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @masked_bit_clear(
-; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[SH1]], [[X:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[AND]], zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32>
-; CHECK-NEXT:    ret <2 x i32> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[X:%.*]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <2 x i32> [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i32> [[TMP2]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
 ;
   %sh1 = shl <2 x i32> <i32 1, i32 1>, %y
   %and = and <2 x i32> %sh1, %x
@@ -208,11 +205,9 @@ define <2 x i32> @masked_bit_clear(<2 x i32> %x, <2 x i32> %y) {
 define <2 x i32> @masked_bit_set_commute(<2 x i32> %px, <2 x i32> %y) {
 ; CHECK-LABEL: @masked_bit_set_commute(
 ; CHECK-NEXT:    [[X:%.*]] = srem <2 x i32> <i32 42, i32 3>, [[PX:%.*]]
-; CHECK-NEXT:    [[SH1:%.*]] = shl <2 x i32> <i32 1, i32 1>, [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[X]], [[SH1]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne <2 x i32> [[AND]], zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = zext <2 x i1> [[CMP]] to <2 x i32>
-; CHECK-NEXT:    ret <2 x i32> [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[X]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
 ;
   %x = srem <2 x i32> <i32 42, i32 3>, %px ; thwart complexity-based canonicalization
   %sh1 = shl <2 x i32> <i32 1, i32 1>, %y
@@ -225,11 +220,10 @@ define <2 x i32> @masked_bit_set_commute(<2 x i32> %px, <2 x i32> %y) {
 define i32 @masked_bit_clear_commute(i32 %px, i32 %y) {
 ; CHECK-LABEL: @masked_bit_clear_commute(
 ; CHECK-NEXT:    [[X:%.*]] = srem i32 42, [[PX:%.*]]
-; CHECK-NEXT:    [[SH1:%.*]] = shl i32 1, [[Y:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[X]], [[SH1]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
-; CHECK-NEXT:    [[R:%.*]] = zext i1 [[CMP]] to i32
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %x = srem i32 42, %px ; thwart complexity-based canonicalization
   %sh1 = shl i32 1, %y
@@ -243,10 +237,9 @@ define i32 @masked_bit_set_use1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @masked_bit_set_use1(
 ; CHECK-NEXT:    [[SH1:%.*]] = shl i32 1, [[Y:%.*]]
 ; CHECK-NEXT:    call void @use32(i32 [[SH1]])
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SH1]], [[X:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT:    [[R:%.*]] = zext i1 [[CMP]] to i32
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 1
+; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
   %sh1 = shl i32 1, %y
   call void @use32(i32 %sh1)
@@ -256,6 +249,8 @@ define i32 @masked_bit_set_use1(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Negative test
+
 define i32 @masked_bit_set_use2(i32 %x, i32 %y) {
 ; CHECK-LABEL: @masked_bit_set_use2(
 ; CHECK-NEXT:    [[SH1:%.*]] = shl i32 1, [[Y:%.*]]
@@ -273,6 +268,8 @@ define i32 @masked_bit_set_use2(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Negative test
+
 define i32 @masked_bit_set_use3(i32 %x, i32 %y) {
 ; CHECK-LABEL: @masked_bit_set_use3(
 ; CHECK-NEXT:    [[SH1:%.*]] = shl i32 1, [[Y:%.*]]
@@ -294,10 +291,10 @@ define i32 @masked_bit_clear_use1(i32 %x, i32 %y) {
 ; CHECK-LABEL: @masked_bit_clear_use1(
 ; CHECK-NEXT:    [[SH1:%.*]] = shl i32 1, [[Y:%.*]]
 ; CHECK-NEXT:    call void @use32(i32 [[SH1]])
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SH1]], [[X:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
-; CHECK-NEXT:    [[R:%.*]] = zext i1 [[CMP]] to i32
-; CHECK-NEXT:    ret i32 [[R]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], [[Y]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    ret i32 [[TMP3]]
 ;
   %sh1 = shl i32 1, %y
   call void @use32(i32 %sh1)
@@ -307,6 +304,8 @@ define i32 @masked_bit_clear_use1(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Negative test
+
 define i32 @masked_bit_clear_use2(i32 %x, i32 %y) {
 ; CHECK-LABEL: @masked_bit_clear_use2(
 ; CHECK-NEXT:    [[SH1:%.*]] = shl i32 1, [[Y:%.*]]
@@ -324,6 +323,8 @@ define i32 @masked_bit_clear_use2(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Negative test
+
 define i32 @masked_bit_clear_use3(i32 %x, i32 %y) {
 ; CHECK-LABEL: @masked_bit_clear_use3(
 ; CHECK-NEXT:    [[SH1:%.*]] = shl i32 1, [[Y:%.*]]
@@ -341,6 +342,8 @@ define i32 @masked_bit_clear_use3(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Negative test
+
 define i32 @masked_bits_set(i32 %x, i32 %y) {
 ; CHECK-LABEL: @masked_bits_set(
 ; CHECK-NEXT:    [[SH1:%.*]] = shl i32 3, [[Y:%.*]]
@@ -356,6 +359,8 @@ define i32 @masked_bits_set(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Negative test
+
 define i32 @div_bit_set(i32 %x, i32 %y) {
 ; CHECK-LABEL: @div_bit_set(
 ; CHECK-NEXT:    [[SH1:%.*]] = shl i32 1, [[Y:%.*]]
@@ -371,6 +376,8 @@ define i32 @div_bit_set(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Negative test
+
 define i32 @masked_bit_set_nonzero_cmp(i32 %x, i32 %y) {
 ; CHECK-LABEL: @masked_bit_set_nonzero_cmp(
 ; CHECK-NEXT:    [[SH1:%.*]] = shl i32 1, [[Y:%.*]]
@@ -386,6 +393,8 @@ define i32 @masked_bit_set_nonzero_cmp(i32 %x, i32 %y) {
   ret i32 %r
 }
 
+; Negative test
+
 define i32 @masked_bit_wrong_pred(i32 %x, i32 %y) {
 ; CHECK-LABEL: @masked_bit_wrong_pred(
 ; CHECK-NEXT:    [[SH1:%.*]] = shl i32 1, [[Y:%.*]]