[llvm] 8a7e547 - [InstCombine] Canonicalize `(X +/- Y) & Y` into `~X & Y` when Y is a power of 2 (#67915)

Thu Oct 12 02:18:16 PDT 2023

Author: Yingwei Zheng
Date: 2023-10-12T17:18:12+08:00
New Revision: 8a7e5477982bcbfd46495e8e5e404fe43ad22d97

URL: https://github.com/llvm/llvm-project/commit/8a7e5477982bcbfd46495e8e5e404fe43ad22d97
DIFF: https://github.com/llvm/llvm-project/commit/8a7e5477982bcbfd46495e8e5e404fe43ad22d97.diff

LOG: [InstCombine] Canonicalize `(X +/- Y) & Y` into `~X & Y` when Y is a power of 2 (#67915)

This patch canonicalizes the pattern `(X +/- Y) & Y` into `~X & Y` when `Y` is a power of 2 or zero.
It will reduce the patterns to match in #67836 and exploit more optimization opportunities.
Alive2: https://alive2.llvm.org/ce/z/LBpvRF

Added: 
    

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
    llvm/test/Transforms/InstCombine/and.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index cbdab3e9c5fb91d..4322cc96f5a2b6c 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2250,6 +2250,14 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
     return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y);
   }
 
+  // Canonicalize:
+  // (X +/- Y) & Y --> ~X & Y when Y is a power of 2.
+  if (match(&I, m_c_And(m_Value(Y), m_OneUse(m_CombineOr(
+                                        m_c_Add(m_Value(X), m_Deferred(Y)),
+                                        m_Sub(m_Value(X), m_Deferred(Y)))))) &&
+      isKnownToBeAPowerOfTwo(Y, /*OrZero*/ true, /*Depth*/ 0, &I))
+    return BinaryOperator::CreateAnd(Builder.CreateNot(X), Y);
+
   const APInt *C;
   if (match(Op1, m_APInt(C))) {
     const APInt *XorC;

diff  --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
index 48e21a581b7d329..95b1b0e73ea5c7a 100644
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -1595,8 +1595,8 @@ define <2 x i8> @flip_masked_bit_uniform(<2 x i8> %A) {
 
 define <2 x i8> @flip_masked_bit_undef(<2 x i8> %A) {
 ; CHECK-LABEL: @flip_masked_bit_undef(
-; CHECK-NEXT:    [[B:%.*]] = add <2 x i8> [[A:%.*]], <i8 16, i8 undef>
-; CHECK-NEXT:    [[C:%.*]] = and <2 x i8> [[B]], <i8 16, i8 undef>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i8> [[TMP1]], <i8 16, i8 undef>
 ; CHECK-NEXT:    ret <2 x i8> [[C]]
 ;
   %B = add <2 x i8> %A, <i8 16, i8 undef>
@@ -1606,8 +1606,8 @@ define <2 x i8> @flip_masked_bit_undef(<2 x i8> %A) {
 
 define <2 x i8> @flip_masked_bit_nonuniform(<2 x i8> %A) {
 ; CHECK-LABEL: @flip_masked_bit_nonuniform(
-; CHECK-NEXT:    [[B:%.*]] = add <2 x i8> [[A:%.*]], <i8 16, i8 4>
-; CHECK-NEXT:    [[C:%.*]] = and <2 x i8> [[B]], <i8 16, i8 4>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i8> [[A:%.*]], <i8 -1, i8 -1>
+; CHECK-NEXT:    [[C:%.*]] = and <2 x i8> [[TMP1]], <i8 16, i8 4>
 ; CHECK-NEXT:    ret <2 x i8> [[C]]
 ;
   %B = add <2 x i8> %A, <i8 16, i8 4>
@@ -2553,8 +2553,8 @@ define i32 @canonicalize_and_add_power2_or_zero(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[P2:%.*]] = and i32 [[NY]], [[Y]]
 ; CHECK-NEXT:    call void @use32(i32 [[P2]])
 ; CHECK-NEXT:    [[X2:%.*]] = mul i32 [[X:%.*]], [[X]]
-; CHECK-NEXT:    [[VAL:%.*]] = add i32 [[X2]], [[P2]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[VAL]], [[P2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X2]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[P2]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
   %ny = sub i32 0, %y
@@ -2572,8 +2572,8 @@ define i32 @canonicalize_and_sub_power2_or_zero(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[NY:%.*]] = sub i32 0, [[Y:%.*]]
 ; CHECK-NEXT:    [[P2:%.*]] = and i32 [[NY]], [[Y]]
 ; CHECK-NEXT:    call void @use32(i32 [[P2]])
-; CHECK-NEXT:    [[VAL:%.*]] = sub i32 [[X:%.*]], [[P2]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[VAL]], [[P2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[P2]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
   %ny = sub i32 0, %y
@@ -2590,8 +2590,8 @@ define i32 @canonicalize_and_add_power2_or_zero_commuted1(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[NY:%.*]] = sub i32 0, [[Y:%.*]]
 ; CHECK-NEXT:    [[P2:%.*]] = and i32 [[NY]], [[Y]]
 ; CHECK-NEXT:    call void @use32(i32 [[P2]])
-; CHECK-NEXT:    [[VAL:%.*]] = add i32 [[P2]], [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[VAL]], [[P2]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[P2]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
   %ny = sub i32 0, %y
@@ -2609,8 +2609,8 @@ define i32 @canonicalize_and_add_power2_or_zero_commuted2(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[P2:%.*]] = and i32 [[NY]], [[Y]]
 ; CHECK-NEXT:    call void @use32(i32 [[P2]])
 ; CHECK-NEXT:    [[X2:%.*]] = mul i32 [[X:%.*]], [[X]]
-; CHECK-NEXT:    [[VAL:%.*]] = add i32 [[X2]], [[P2]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[P2]], [[VAL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X2]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[P2]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
   %ny = sub i32 0, %y
@@ -2628,8 +2628,8 @@ define i32 @canonicalize_and_add_power2_or_zero_commuted3(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[NY:%.*]] = sub i32 0, [[Y:%.*]]
 ; CHECK-NEXT:    [[P2:%.*]] = and i32 [[NY]], [[Y]]
 ; CHECK-NEXT:    call void @use32(i32 [[P2]])
-; CHECK-NEXT:    [[VAL:%.*]] = add i32 [[P2]], [[X:%.*]]
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[P2]], [[VAL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], -1
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[P2]], [[TMP1]]
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
   %ny = sub i32 0, %y