[llvm] [InstCombine] Extend Fold of Zero-extended Bit Test (PR #102100)

Wed Aug 7 04:04:54 PDT 2024

https://github.com/mskamp updated https://github.com/llvm/llvm-project/pull/102100

>From f065a8ee6d327b43e9f41d968764187c81cf384f Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Sun, 4 Aug 2024 18:38:07 +0200
Subject: [PATCH 1/2] [InstCombine] Add Tests for Zero-extended Bit Tests; NFC

---
 llvm/test/Transforms/InstCombine/zext.ll | 110 +++++++++++++++++++++++
 1 file changed, 110 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/zext.ll b/llvm/test/Transforms/InstCombine/zext.ll
index 88cd9c70af40d..699d7d03e7da7 100644
--- a/llvm/test/Transforms/InstCombine/zext.ll
+++ b/llvm/test/Transforms/InstCombine/zext.ll
@@ -454,6 +454,116 @@ define i32 @zext_or_masked_bit_test_uses(i32 %a, i32 %b, i32 %x) {
   ret i32 %z
 }
 
+define i16 @zext_masked_bit_zero_to_smaller_bitwidth(i32 %a, i32 %b) {
+; CHECK-LABEL: @zext_masked_bit_zero_to_smaller_bitwidth(
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[B:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    ret i16 [[Z]]
+;
+  %shl = shl i32 1, %b
+  %and = and i32 %shl, %a
+  %cmp = icmp eq i32 %and, 0
+  %z = zext i1 %cmp to i16
+  ret i16 %z
+}
+
+define <4 x i16> @zext_masked_bit_zero_to_smaller_bitwidth_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @zext_masked_bit_zero_to_smaller_bitwidth_v4i32(
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[B:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[SHL]], [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer
+; CHECK-NEXT:    [[Z:%.*]] = zext <4 x i1> [[CMP]] to <4 x i16>
+; CHECK-NEXT:    ret <4 x i16> [[Z]]
+;
+  %shl = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %b
+  %and = and <4 x i32> %shl, %a
+  %cmp = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 0>
+  %z = zext <4 x i1> %cmp to <4 x i16>
+  ret <4 x i16> %z
+}
+
+; Negative test
+define i16 @zext_masked_bit_zero_to_smaller_bitwidth_multi_use_shl(i32 %a, i32 %b) {
+; CHECK-LABEL: @zext_masked_bit_zero_to_smaller_bitwidth_multi_use_shl(
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[B:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    call void @use32(i32 [[SHL]])
+; CHECK-NEXT:    ret i16 [[Z]]
+;
+  %shl = shl i32 1, %b
+  %and = and i32 %shl, %a
+  %cmp = icmp eq i32 %and, 0
+  %z = zext i1 %cmp to i16
+  call void @use32(i32 %shl)
+  ret i16 %z
+}
+
+define i16 @zext_masked_bit_nonzero_to_smaller_bitwidth(i32 %a, i32 %b) {
+; CHECK-LABEL: @zext_masked_bit_nonzero_to_smaller_bitwidth(
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[B:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    ret i16 [[Z]]
+;
+  %shl = shl i32 1, %b
+  %and = and i32 %shl, %a
+  %cmp = icmp ne i32 %and, 0
+  %z = zext i1 %cmp to i16
+  ret i16 %z
+}
+
+define i16 @zext_masked_bit_nonzero_to_smaller_bitwidth_multi_use_shl(i32 %a, i32 %b) {
+; CHECK-LABEL: @zext_masked_bit_nonzero_to_smaller_bitwidth_multi_use_shl(
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[B:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    call void @use32(i32 [[SHL]])
+; CHECK-NEXT:    ret i16 [[Z]]
+;
+  %shl = shl i32 1, %b
+  %and = and i32 %shl, %a
+  %cmp = icmp ne i32 %and, 0
+  %z = zext i1 %cmp to i16
+  call void @use32(i32 %shl)
+  ret i16 %z
+}
+
+define i64 @zext_masked_bit_zero_to_larger_bitwidth(i32 %a, i32 %b) {
+; CHECK-LABEL: @zext_masked_bit_zero_to_larger_bitwidth(
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[B:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
+; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[CMP]] to i64
+; CHECK-NEXT:    ret i64 [[Z]]
+;
+  %shl = shl i32 1, %b
+  %and = and i32 %shl, %a
+  %cmp = icmp eq i32 %and, 0
+  %z = zext i1 %cmp to i64
+  ret i64 %z
+}
+
+define <4 x i64> @zext_masked_bit_zero_to_larger_bitwidth_v4i32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @zext_masked_bit_zero_to_larger_bitwidth_v4i32(
+; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[B:%.*]]
+; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[SHL]], [[A:%.*]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer
+; CHECK-NEXT:    [[Z:%.*]] = zext <4 x i1> [[CMP]] to <4 x i64>
+; CHECK-NEXT:    ret <4 x i64> [[Z]]
+;
+  %shl = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %b
+  %and = and <4 x i32> %shl, %a
+  %cmp = icmp eq <4 x i32> %and, <i32 0, i32 0, i32 0, i32 0>
+  %z = zext <4 x i1> %cmp to <4 x i64>
+  ret <4 x i64> %z
+}
+
 define i32 @notneg_zext_wider(i8 %x) {
 ; CHECK-LABEL: @notneg_zext_wider(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[X:%.*]], -1

>From 94de5cdcf72f046f6deb6ff1ea17b8ed243b5147 Mon Sep 17 00:00:00 2001
From: Marius Kamp <msk at posteo.org>
Date: Mon, 5 Aug 2024 19:55:12 +0200
Subject: [PATCH 2/2] [InstCombine] Extend Fold of Zero-extended Bit Test

Previously, (zext (icmp ne (and X, (1 << ShAmt)), 0)) has only been
folded if the bit width of X and the result were equal. Use a trunc or
zext instruction to also support other bit widths.

This is a follow-up to commit 533190acdb9d2ed774f96a998b5c03be3df4f857,
which introduced a regression: (zext (icmp ne (and (lshr X ShAmt) 1) 0))
is not folded any longer to (zext/trunc (and (lshr X ShAmt) 1)) since
the commit introduced the fold of (icmp ne (and (lshr X ShAmt) 1) 0) to
(icmp ne (and X (1 << ShAmt)) 0). The change introduced by this commit
restores this fold.

Alive proof: https://alive2.llvm.org/ce/z/MFkNXs

Relates to issue #86813.
---
 .../InstCombine/InstCombineCasts.cpp          | 19 +++++---
 llvm/test/Transforms/InstCombine/zext.ll      | 45 +++++++++----------
 2 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 97ee845548e28..5c9faa9449f53 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -985,7 +985,7 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
     }
   }
 
-  if (Cmp->isEquality() && Zext.getType() == Cmp->getOperand(0)->getType()) {
+  if (Cmp->isEquality()) {
     // Test if a bit is clear/set using a shifted-one mask:
     // zext (icmp eq (and X, (1 << ShAmt)), 0) --> and (lshr (not X), ShAmt), 1
     // zext (icmp ne (and X, (1 << ShAmt)), 0) --> and (lshr X, ShAmt), 1
@@ -993,11 +993,18 @@ Instruction *InstCombinerImpl::transformZExtICmp(ICmpInst *Cmp,
     if (Cmp->hasOneUse() && match(Cmp->getOperand(1), m_ZeroInt()) &&
         match(Cmp->getOperand(0),
               m_OneUse(m_c_And(m_Shl(m_One(), m_Value(ShAmt)), m_Value(X))))) {
-      if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
-        X = Builder.CreateNot(X);
-      Value *Lshr = Builder.CreateLShr(X, ShAmt);
-      Value *And1 = Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
-      return replaceInstUsesWith(Zext, And1);
+      auto *And = cast<BinaryOperator>(Cmp->getOperand(0));
+      Value *Shift = And->getOperand(X == And->getOperand(0) ? 1 : 0);
+      if (Zext.getType() == And->getType() ||
+          Cmp->getPredicate() != ICmpInst::ICMP_EQ || Shift->hasOneUse()) {
+        if (Cmp->getPredicate() == ICmpInst::ICMP_EQ)
+          X = Builder.CreateNot(X);
+        Value *Lshr = Builder.CreateLShr(X, ShAmt);
+        Value *And1 =
+            Builder.CreateAnd(Lshr, ConstantInt::get(X->getType(), 1));
+        return replaceInstUsesWith(
+            Zext, Builder.CreateZExtOrTrunc(And1, Zext.getType()));
+      }
     }
   }
 
diff --git a/llvm/test/Transforms/InstCombine/zext.ll b/llvm/test/Transforms/InstCombine/zext.ll
index 699d7d03e7da7..1a5d829d50165 100644
--- a/llvm/test/Transforms/InstCombine/zext.ll
+++ b/llvm/test/Transforms/InstCombine/zext.ll
@@ -456,10 +456,10 @@ define i32 @zext_or_masked_bit_test_uses(i32 %a, i32 %b, i32 %x) {
 
 define i16 @zext_masked_bit_zero_to_smaller_bitwidth(i32 %a, i32 %b) {
 ; CHECK-LABEL: @zext_masked_bit_zero_to_smaller_bitwidth(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[B:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
-; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
+; CHECK-NEXT:    [[Z:%.*]] = and i16 [[TMP3]], 1
 ; CHECK-NEXT:    ret i16 [[Z]]
 ;
   %shl = shl i32 1, %b
@@ -471,10 +471,10 @@ define i16 @zext_masked_bit_zero_to_smaller_bitwidth(i32 %a, i32 %b) {
 
 define <4 x i16> @zext_masked_bit_zero_to_smaller_bitwidth_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @zext_masked_bit_zero_to_smaller_bitwidth_v4i32(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[B:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[SHL]], [[A:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer
-; CHECK-NEXT:    [[Z:%.*]] = zext <4 x i1> [[CMP]] to <4 x i16>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+; CHECK-NEXT:    [[Z:%.*]] = and <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
 ; CHECK-NEXT:    ret <4 x i16> [[Z]]
 ;
   %shl = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %b
@@ -504,10 +504,9 @@ define i16 @zext_masked_bit_zero_to_smaller_bitwidth_multi_use_shl(i32 %a, i32 %
 
 define i16 @zext_masked_bit_nonzero_to_smaller_bitwidth(i32 %a, i32 %b) {
 ; CHECK-LABEL: @zext_masked_bit_nonzero_to_smaller_bitwidth(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[B:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT:    [[Z:%.*]] = and i16 [[TMP2]], 1
 ; CHECK-NEXT:    ret i16 [[Z]]
 ;
   %shl = shl i32 1, %b
@@ -520,9 +519,9 @@ define i16 @zext_masked_bit_nonzero_to_smaller_bitwidth(i32 %a, i32 %b) {
 define i16 @zext_masked_bit_nonzero_to_smaller_bitwidth_multi_use_shl(i32 %a, i32 %b) {
 ; CHECK-LABEL: @zext_masked_bit_nonzero_to_smaller_bitwidth_multi_use_shl(
 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[B:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[CMP]] to i16
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[A:%.*]], [[B]]
+; CHECK-NEXT:    [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16
+; CHECK-NEXT:    [[Z:%.*]] = and i16 [[TMP2]], 1
 ; CHECK-NEXT:    call void @use32(i32 [[SHL]])
 ; CHECK-NEXT:    ret i16 [[Z]]
 ;
@@ -536,10 +535,10 @@ define i16 @zext_masked_bit_nonzero_to_smaller_bitwidth_multi_use_shl(i32 %a, i3
 
 define i64 @zext_masked_bit_zero_to_larger_bitwidth(i32 %a, i32 %b) {
 ; CHECK-LABEL: @zext_masked_bit_zero_to_larger_bitwidth(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[B:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[SHL]], [[A:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[AND]], 0
-; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[CMP]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT:    [[Z:%.*]] = zext nneg i32 [[TMP3]] to i64
 ; CHECK-NEXT:    ret i64 [[Z]]
 ;
   %shl = shl i32 1, %b
@@ -551,10 +550,10 @@ define i64 @zext_masked_bit_zero_to_larger_bitwidth(i32 %a, i32 %b) {
 
 define <4 x i64> @zext_masked_bit_zero_to_larger_bitwidth_v4i32(<4 x i32> %a, <4 x i32> %b) {
 ; CHECK-LABEL: @zext_masked_bit_zero_to_larger_bitwidth_v4i32(
-; CHECK-NEXT:    [[SHL:%.*]] = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[B:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and <4 x i32> [[SHL]], [[A:%.*]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> [[AND]], zeroinitializer
-; CHECK-NEXT:    [[Z:%.*]] = zext <4 x i1> [[CMP]] to <4 x i64>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <4 x i32> [[A:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], [[B:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = and <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[Z:%.*]] = zext nneg <4 x i32> [[TMP3]] to <4 x i64>
 ; CHECK-NEXT:    ret <4 x i64> [[Z]]
 ;
   %shl = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %b