[llvm] goldsteinn/cmp eq p2 neg p2 (PR #94867)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jun 8 12:34:04 PDT 2024


https://github.com/goldsteinn created https://github.com/llvm/llvm-project/pull/94867

- **[InstCombine] Add tests for combining `(icmp eq/ne (and X, P2), (and X, -P2))`; NFC**
- **[InstCombine] Add  combines for `(icmp eq/ne (and X, P2), (and X, -P2))`**


>From d8d186e6a892baa912ee7c27c9307dc373182229 Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Sat, 8 Jun 2024 14:22:35 -0500
Subject: [PATCH 1/2] [InstCombine] Add tests for combining `(icmp eq/ne (and
 X, P2), (and X, -P2))`; NFC

---
 .../Transforms/InstCombine/and-compare.ll     | 157 ++++++++++++++++++
 1 file changed, 157 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll
index 14379ebf3a905..e7b24bb0fc3f1 100644
--- a/llvm/test/Transforms/InstCombine/and-compare.ll
+++ b/llvm/test/Transforms/InstCombine/and-compare.ll
@@ -4,6 +4,8 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
+declare void @use.i8(i8)
+
 ; Should be optimized to one and.
 define i1 @test1(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test1(
@@ -75,3 +77,158 @@ define <2 x i1> @test3vec(<2 x i64> %A) {
   ret <2 x i1> %cmp
 }
 
+define i1 @test_eq_p2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2(
+; CHECK-NEXT:    [[Y:%.*]] = shl nuw nsw i8 1, [[YY:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i8 [[Y]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y]], [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[TMP2]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = shl nsw i8 1, %yy
+  %neg_y = sub i8 0, %y
+  %and_x_neg_y = and i8 %x, %neg_y
+  %and_x_y = and i8 %x, %y
+
+  %r = icmp eq i8 %and_x_y, %and_x_neg_y
+  ret i1 %r
+}
+
+define i1 @test_eq_p2_2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2_2(
+; CHECK-NEXT:    [[Y:%.*]] = shl nuw nsw i8 1, [[YY:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i8 [[Y]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y]], [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[TMP2]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = shl nsw i8 1, %yy
+  %neg_y = sub i8 0, %y
+  %and_x_neg_y = and i8 %x, %neg_y
+  %and_x_y = and i8 %x, %y
+
+  %r = icmp eq i8 %and_x_neg_y, %and_x_y
+  ret i1 %r
+}
+
+define i1 @test_eq_p2_fail_maybe_zero(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2_fail_maybe_zero(
+; CHECK-NEXT:    [[Y:%.*]] = shl i8 2, [[YY:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Y]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y]], [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[TMP2]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %yyy = shl nsw i8 1, %yy
+  %y = add i8 %yyy, %yyy
+  %neg_y = sub nsw i8 0, %y
+  %and_x_neg_y = and i8 %x, %neg_y
+  %and_x_y = and i8 %x, %y
+
+  %r = icmp eq i8 %and_x_neg_y, %and_x_y
+  ret i1 %r
+}
+
+define i1 @test_eq_p2_fail_maybe_int_min(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2_fail_maybe_int_min(
+; CHECK-NEXT:    [[Y:%.*]] = shl nuw i8 1, [[YY:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[Y]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y]], [[TMP1]]
+; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[TMP2]], [[X:%.*]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = shl i8 1, %yy
+  %neg_y = sub i8 0, %y
+  %and_x_neg_y = and i8 %x, %neg_y
+  %and_x_y = and i8 %x, %y
+
+  %r = icmp eq i8 %and_x_y, %and_x_neg_y
+  ret i1 %r
+}
+
+define i1 @test_eq_p2_fail_multiuse(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_eq_p2_fail_multiuse(
+; CHECK-NEXT:    [[Y:%.*]] = shl nuw nsw i8 1, [[YY:%.*]]
+; CHECK-NEXT:    [[NEG_Y:%.*]] = sub nsw i8 0, [[Y]]
+; CHECK-NEXT:    [[AND_X_NEG_Y:%.*]] = and i8 [[NEG_Y]], [[X:%.*]]
+; CHECK-NEXT:    [[AND_X_Y:%.*]] = and i8 [[Y]], [[X]]
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT:    [[R:%.*]] = icmp eq i8 [[AND_X_Y]], [[AND_X_NEG_Y]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %y = shl nsw i8 1, %yy
+  %neg_y = sub i8 0, %y
+  %and_x_neg_y = and i8 %x, %neg_y
+  %and_x_y = and i8 %x, %y
+  call void @use.i8(i8 %and_x_y)
+  %r = icmp eq i8 %and_x_y, %and_x_neg_y
+  ret i1 %r
+}
+
+define i1 @test_ne_cp2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_ne_cp2(
+; CHECK-NEXT:    [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -16
+; CHECK-NEXT:    [[AND_X_Y:%.*]] = and i8 [[X]], 16
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_NEG_Y]])
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND_X_NEG_Y]], [[AND_X_Y]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %and_x_neg_y = and i8 %x, -16
+  %and_x_y = and i8 %x, 16
+  call void @use.i8(i8 %and_x_neg_y)
+  call void @use.i8(i8 %and_x_y)
+  %r = icmp ne i8 %and_x_neg_y, %and_x_y
+  ret i1 %r
+}
+
+define i1 @test_ne_cp2_2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_ne_cp2_2(
+; CHECK-NEXT:    [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -4
+; CHECK-NEXT:    [[AND_X_Y:%.*]] = and i8 [[X]], 4
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_NEG_Y]])
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND_X_Y]], [[AND_X_NEG_Y]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %and_x_neg_y = and i8 %x, -4
+  %and_x_y = and i8 %x, 4
+  call void @use.i8(i8 %and_x_neg_y)
+  call void @use.i8(i8 %and_x_y)
+  %r = icmp ne i8 %and_x_y, %and_x_neg_y
+  ret i1 %r
+}
+
+define i1 @test_ne_cp2_other_fail(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_ne_cp2_other_fail(
+; CHECK-NEXT:    [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -17
+; CHECK-NEXT:    [[AND_X_Y:%.*]] = and i8 [[X]], 16
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_NEG_Y]])
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND_X_NEG_Y]], [[AND_X_Y]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %and_x_neg_y = and i8 %x, -17
+  %and_x_y = and i8 %x, 16
+  call void @use.i8(i8 %and_x_neg_y)
+  call void @use.i8(i8 %and_x_y)
+  %r = icmp ne i8 %and_x_neg_y, %and_x_y
+  ret i1 %r
+}
+
+define i1 @test_ne_cp2_other_fail2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @test_ne_cp2_other_fail2(
+; CHECK-NEXT:    [[AND_X_NEG_Y:%.*]] = and i8 [[X:%.*]], -16
+; CHECK-NEXT:    [[AND_X_Y:%.*]] = and i8 [[X]], 17
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_NEG_Y]])
+; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_Y]])
+; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND_X_NEG_Y]], [[AND_X_Y]]
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %and_x_neg_y = and i8 %x, -16
+  %and_x_y = and i8 %x, 17
+  call void @use.i8(i8 %and_x_neg_y)
+  call void @use.i8(i8 %and_x_y)
+  %r = icmp ne i8 %and_x_neg_y, %and_x_y
+  ret i1 %r
+}

>From 970c0d69e8ebd55d2b21cd5ec215ee9c63c5495f Mon Sep 17 00:00:00 2001
From: Noah Goldstein <goldstein.w.n at gmail.com>
Date: Sat, 8 Jun 2024 14:22:37 -0500
Subject: [PATCH 2/2] [InstCombine] Add  combines for `(icmp eq/ne (and X, P2),
 (and X, -P2))`

`(icmp eq/ne (and X, P2), (and X, -P2))`
    -> `(icmp ult/uge X, P2 * 2)`

If `P2` is constant, we can perform this fold profitably even if the
`and` ops are multi-use.

If `P2` is not constant, then we only perform the fold if the `and`
ops are multi-use. This, however, saves an instruction compared to the
`xor` + `and` variant.

NB: This came up in some of the diffs resulting from #94648

Proofs: https://alive2.llvm.org/ce/z/mfd3G9
---
 .../InstCombine/InstCombineCompares.cpp       | 38 ++++++++++++++++---
 .../Transforms/InstCombine/and-compare.ll     | 16 +++-----
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 4203147bc6a54..e79913500f320 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5548,8 +5548,8 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
   }
 
   // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
-  if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
-      match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
+  if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+      match(Op1, m_And(m_Value(C), m_Value(D)))) {
     Value *X = nullptr, *Y = nullptr, *Z = nullptr;
 
     if (A == C) {
@@ -5570,10 +5570,36 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
       Z = B;
     }
 
-    if (X) { // Build (X^Y) & Z
-      Op1 = Builder.CreateXor(X, Y);
-      Op1 = Builder.CreateAnd(Op1, Z);
-      return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType()));
+    if (X) {
+      // (X&P2) == (X&-P2)
+      //    -> X u< P2*2
+      // (X&P2) != (X&-P2)
+      //    -> X u>= P2*2
+      // iff P2 is not INT_MIN
+      const APInt *CP2;
+      ICmpInst::Predicate P2Pred =
+          Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;
+      if (match(X, m_APInt(CP2)) && match(Y, m_SpecificInt(-*CP2)) &&
+          (CP2->isPowerOf2() || CP2->isNegatedPowerOf2()) &&
+          !CP2->isMinSignedValue()) {
+        APInt CMask = CP2->isPowerOf2() ? *CP2 : -*CP2;
+        return new ICmpInst(P2Pred, Z,
+                            ConstantInt::get(Z->getType(), CMask + CMask));
+      }
+
+      if (Op0->hasOneUse() && Op1->hasOneUse()) {
+        // nsw neg precludes INT_MIN
+        if (match(X, m_NSWNeg(m_Specific(Y))))
+          std::swap(X, Y);
+        if (match(Y, m_NSWNeg(m_Specific(X))) &&
+            isKnownToBeAPowerOfTwo(X, /*OrZero=*/false, 0, &I))
+          return new ICmpInst(P2Pred, Z, Builder.CreateAdd(X, X));
+
+        // Build (X^Y) & Z
+        Op1 = Builder.CreateXor(X, Y);
+        Op1 = Builder.CreateAnd(Op1, Z);
+        return new ICmpInst(Pred, Op1, Constant::getNullValue(Op1->getType()));
+      }
     }
   }
 
diff --git a/llvm/test/Transforms/InstCombine/and-compare.ll b/llvm/test/Transforms/InstCombine/and-compare.ll
index e7b24bb0fc3f1..daadbf36d7577 100644
--- a/llvm/test/Transforms/InstCombine/and-compare.ll
+++ b/llvm/test/Transforms/InstCombine/and-compare.ll
@@ -79,10 +79,8 @@ define <2 x i1> @test3vec(<2 x i64> %A) {
 
 define i1 @test_eq_p2(i8 %x, i8 %yy) {
 ; CHECK-LABEL: @test_eq_p2(
-; CHECK-NEXT:    [[Y:%.*]] = shl nuw nsw i8 1, [[YY:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i8 [[Y]], -1
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y]], [[TMP1]]
-; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[TMP2]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 2, [[YY:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %y = shl nsw i8 1, %yy
@@ -96,10 +94,8 @@ define i1 @test_eq_p2(i8 %x, i8 %yy) {
 
 define i1 @test_eq_p2_2(i8 %x, i8 %yy) {
 ; CHECK-LABEL: @test_eq_p2_2(
-; CHECK-NEXT:    [[Y:%.*]] = shl nuw nsw i8 1, [[YY:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = add nsw i8 [[Y]], -1
-; CHECK-NEXT:    [[TMP2:%.*]] = xor i8 [[Y]], [[TMP1]]
-; CHECK-NEXT:    [[R:%.*]] = icmp uge i8 [[TMP2]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i8 2, [[YY:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[TMP1]], [[X:%.*]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %y = shl nsw i8 1, %yy
@@ -171,7 +167,7 @@ define i1 @test_ne_cp2(i8 %x, i8 %yy) {
 ; CHECK-NEXT:    [[AND_X_Y:%.*]] = and i8 [[X]], 16
 ; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_NEG_Y]])
 ; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_Y]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND_X_NEG_Y]], [[AND_X_Y]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], 31
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %and_x_neg_y = and i8 %x, -16
@@ -188,7 +184,7 @@ define i1 @test_ne_cp2_2(i8 %x, i8 %yy) {
 ; CHECK-NEXT:    [[AND_X_Y:%.*]] = and i8 [[X]], 4
 ; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_NEG_Y]])
 ; CHECK-NEXT:    call void @use.i8(i8 [[AND_X_Y]])
-; CHECK-NEXT:    [[R:%.*]] = icmp ne i8 [[AND_X_Y]], [[AND_X_NEG_Y]]
+; CHECK-NEXT:    [[R:%.*]] = icmp ugt i8 [[X]], 7
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %and_x_neg_y = and i8 %x, -4



More information about the llvm-commits mailing list