[llvm] [InstCombine] Change (add x, c) to (xor x, c) (PR #75129)

Thu Dec 14 04:00:48 PST 2023

https://github.com/Peter9606 updated https://github.com/llvm/llvm-project/pull/75129

>From f84e38579deda4d1f2b6c54b98bcb3617ed82bb6 Mon Sep 17 00:00:00 2001
From: Peter Han <fujun.han at iluvatar.com>
Date: Thu, 14 Dec 2023 13:48:58 +0800
Subject: [PATCH 1/2] [InstCombine][NFC]Pre-commit test for add-constant to
 xor-constant.

Signed-off-by: Peter Han <fujun.han at iluvatar.com>
---
 llvm/test/Transforms/InstCombine/and.ll     | 48 ++++++++++++++++++++-
 llvm/test/Transforms/InstCombine/pr75129.ll | 41 ++++++++++++++++++
 2 files changed, 87 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/pr75129.ll

diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
index 79857f3efbc18b..b3ecd3c2b8dcb5 100644
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -395,8 +395,8 @@ define i8 @test27(i8 %A) {
 
 define i32 @ashr_lowmask(i32 %x) {
 ; CHECK-LABEL: @ashr_lowmask(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 24
-; CHECK-NEXT:    ret i32 [[TMP1]]
+; CHECK-NEXT:    [[A:%.*]] = lshr i32 [[X:%.*]], 24
+; CHECK-NEXT:    ret i32 [[A]]
 ;
   %a = ashr i32 %x, 24
   %r = and i32 %a, 255
@@ -2711,3 +2711,47 @@ define i32 @canonicalize_and_sub_power2_or_zero_multiuse_nofold(i32 %x, i32 %y)
   %and = and i32 %val, %p2
   ret i32 %and
 }
+
+define i32 @add_constant_equal_with_the_top_bit_of_demandedbits_pass(i32 %x) {
+; CHECK-LABEL: @add_constant_equal_with_the_top_bit_of_demandedbits_pass(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], 16
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ADD]], 24
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %add = add i32 %x, 16
+  %and = and i32 %add, 24
+  ret i32 %and
+}
+
+define <2 x i16> @add_constant_equal_with_the_top_bit_of_demandedbits_pass_vector(<2 x i16> %x) {
+; CHECK-LABEL: @add_constant_equal_with_the_top_bit_of_demandedbits_pass_vector(
+; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i16> [[X:%.*]], <i16 16, i16 16>
+; CHECK-NEXT:    [[AND:%.*]] = and <2 x i16> [[ADD]], <i16 24, i16 24>
+; CHECK-NEXT:    ret <2 x i16> [[AND]]
+;
+  %add = add <2 x i16> %x, <i16 16, i16 16>
+  %and = and <2 x i16> %add, <i16 24, i16 24>
+  ret <2 x i16> %and
+}
+
+define i32 @add_constant_equal_with_the_top_bit_of_demandedbits_fail1(i32 %x) {
+; CHECK-LABEL: @add_constant_equal_with_the_top_bit_of_demandedbits_fail1(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], 8
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ADD]], 24
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %add = add i32 %x, 8
+  %and = and i32 %add, 24
+  ret i32 %and
+}
+
+define i32 @add_constant_equal_with_the_top_bit_of_demandedbits_fail2(i32 %x) {
+; CHECK-LABEL: @add_constant_equal_with_the_top_bit_of_demandedbits_fail2(
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], 24
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ADD]], 24
+; CHECK-NEXT:    ret i32 [[AND]]
+;
+  %add = add i32 %x, 24
+  %and = and i32 %add, 24
+  ret i32 %and
+}
diff --git a/llvm/test/Transforms/InstCombine/pr75129.ll b/llvm/test/Transforms/InstCombine/pr75129.ll
new file mode 100644
index 00000000000000..c9e0fbc0ca0648
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr75129.ll
@@ -0,0 +1,41 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; Pattern:
+;   (or (and (add x, half_c1), c3), (and x, c2))
+; IFF:
+;   c1, c2, c3 is constant
+;   c1 is pow2
+;   c2 < c1
+;   c3 == (c1 - 1) ^ c2
+;   half_c1 == (lshr c1, 1)
+;   (c1 >> 1) & c3 == (c1 >> 1)
+;   x is known to be less than c1
+; Could be transformed into:
+;   (xor x, half_c1)
+; The reason above transformation could be done is becase
+; in (and (add x, half_c1), c3), half_c1 equals the top bit
+; of the demanded mask indicated by (and ..., c3)
+
+define i16 @or_and_add_and() {
+; CHECK-LABEL: @or_and_add_and(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = call i16 @dummy(), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i16 [[X]], 32
+; CHECK-NEXT:    [[AND1:%.*]] = and i16 [[ADD]], 48
+; CHECK-NEXT:    [[AND2:%.*]] = and i16 [[X]], 15
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i16 [[AND1]], [[AND2]]
+; CHECK-NEXT:    ret i16 [[OR]]
+;
+entry:
+  %x = call i16 @dummy(), !range !0
+  %add = add i16 32, %x
+  %and1 = and i16 %add, 48
+  %and2 = and i16 %x, 15
+  %or = or i16 %and1, %and2
+  ret i16 %or
+}
+
+declare i16 @dummy()
+
+!0 = !{i16 0, i16 64}

>From 229586608bf86243327293611dcc49ba9518ba3a Mon Sep 17 00:00:00 2001
From: Peter Han <fujun.han at iluvatar.com>
Date: Thu, 14 Dec 2023 19:59:51 +0800
Subject: [PATCH 2/2] [InstCombine]Change (add x, c) to (xor x, c) iff c is
 constant and c equals the top bit of the demanded bits.

Signed-off-by: Peter Han <fujun.han at iluvatar.com>
---
 .../InstCombine/InstCombineSimplifyDemanded.cpp           | 8 ++++++++
 llvm/test/Transforms/InstCombine/and.ll                   | 8 ++++----
 llvm/test/Transforms/InstCombine/pr75129.ll               | 5 +----
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 046ce9d1207e8e..18d4c2dc308e2e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -552,6 +552,14 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
     if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))
       return I->getOperand(1);
 
+    // (add X, C) --> (xor X, C) IFF C is equal to the top bit of the DemandMask
+    {
+      const APInt *C;
+      if (match(I->getOperand(1), m_APInt(C)) &&
+          C->isOneBitSet(DemandedMask.getActiveBits() - 1))
+        return Builder.CreateXor(I->getOperand(0), ConstantInt::get(VTy, *C));
+    }
+
     // Otherwise just compute the known bits of the result.
     bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
     Known = KnownBits::computeForAddSub(true, NSW, LHSKnown, RHSKnown);
diff --git a/llvm/test/Transforms/InstCombine/and.ll b/llvm/test/Transforms/InstCombine/and.ll
index b3ecd3c2b8dcb5..cb611764ec755b 100644
--- a/llvm/test/Transforms/InstCombine/and.ll
+++ b/llvm/test/Transforms/InstCombine/and.ll
@@ -2714,8 +2714,8 @@ define i32 @canonicalize_and_sub_power2_or_zero_multiuse_nofold(i32 %x, i32 %y)
 
 define i32 @add_constant_equal_with_the_top_bit_of_demandedbits_pass(i32 %x) {
 ; CHECK-LABEL: @add_constant_equal_with_the_top_bit_of_demandedbits_pass(
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X:%.*]], 16
-; CHECK-NEXT:    [[AND:%.*]] = and i32 [[ADD]], 24
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[X:%.*]], 24
+; CHECK-NEXT:    [[AND:%.*]] = xor i32 [[TMP1]], 16
 ; CHECK-NEXT:    ret i32 [[AND]]
 ;
   %add = add i32 %x, 16
@@ -2725,8 +2725,8 @@ define i32 @add_constant_equal_with_the_top_bit_of_demandedbits_pass(i32 %x) {
 
 define <2 x i16> @add_constant_equal_with_the_top_bit_of_demandedbits_pass_vector(<2 x i16> %x) {
 ; CHECK-LABEL: @add_constant_equal_with_the_top_bit_of_demandedbits_pass_vector(
-; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i16> [[X:%.*]], <i16 16, i16 16>
-; CHECK-NEXT:    [[AND:%.*]] = and <2 x i16> [[ADD]], <i16 24, i16 24>
+; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i16> [[X:%.*]], <i16 24, i16 24>
+; CHECK-NEXT:    [[AND:%.*]] = xor <2 x i16> [[TMP1]], <i16 16, i16 16>
 ; CHECK-NEXT:    ret <2 x i16> [[AND]]
 ;
   %add = add <2 x i16> %x, <i16 16, i16 16>
diff --git a/llvm/test/Transforms/InstCombine/pr75129.ll b/llvm/test/Transforms/InstCombine/pr75129.ll
index c9e0fbc0ca0648..c1912514b91bd1 100644
--- a/llvm/test/Transforms/InstCombine/pr75129.ll
+++ b/llvm/test/Transforms/InstCombine/pr75129.ll
@@ -21,10 +21,7 @@ define i16 @or_and_add_and() {
 ; CHECK-LABEL: @or_and_add_and(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[X:%.*]] = call i16 @dummy(), !range [[RNG0:![0-9]+]]
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i16 [[X]], 32
-; CHECK-NEXT:    [[AND1:%.*]] = and i16 [[ADD]], 48
-; CHECK-NEXT:    [[AND2:%.*]] = and i16 [[X]], 15
-; CHECK-NEXT:    [[OR:%.*]] = or disjoint i16 [[AND1]], [[AND2]]
+; CHECK-NEXT:    [[OR:%.*]] = xor i16 [[X]], 32
 ; CHECK-NEXT:    ret i16 [[OR]]
 ;
 entry: