[llvm] [InstCombine] Simplify a complex OR to XOR (PR #75129)

Mon Dec 11 22:46:29 PST 2023

https://github.com/Peter9606 updated https://github.com/llvm/llvm-project/pull/75129

>From 237caa9e2bc6d72a7e64f9eeb94ea0b2d196bc9e Mon Sep 17 00:00:00 2001
From: Peter Han <fujun.han at iluvatar.com>
Date: Tue, 12 Dec 2023 08:21:07 +0800
Subject: [PATCH 1/2] [InstCombine][NFC]Pre-commit test for an OR pattern can
 be simplified to XOR.

Signed-off-by: Peter Han <fujun.han at iluvatar.com>
---
 ...-and-add-constant-constant-and-constant.ll | 130 ++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/or-and-add-constant-constant-and-constant.ll

diff --git a/llvm/test/Transforms/InstCombine/or-and-add-constant-constant-and-constant.ll b/llvm/test/Transforms/InstCombine/or-and-add-constant-constant-and-constant.ll
new file mode 100644
index 00000000000000..e244a5a5e4a75a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/or-and-add-constant-constant-and-constant.ll
@@ -0,0 +1,130 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; Pattern:
+;   (or (and (add x, half_c1), c3), (and x, c2))
+; IFF:
+;   c1, c2, c3 is constant
+;   c1 is pow2
+;   c2 < c1
+;   c3 == (c1 - 1) ^ c2
+;   half_c1 == (lshr c1, 1)
+;   (c1 >> 1) & c3 == (c1 >> 1)
+;   x is known to be less than c1
+; Could be transformed into:
+;   (xor x, half_c1)
+; Proof: https://alive2.llvm.org/ce/z/Lfax3w
+
+define i16 @pass.i16() {
+; CHECK-LABEL: @pass.i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = call i16 @dummy.i16(), !range [[RNG0:![0-9]+]]
+; CHECK-NEXT:    [[OR:%.*]] = xor i16 [[X]], 32
+; CHECK-NEXT:    ret i16 [[OR]]
+;
+entry:
+  %x = call i16 @dummy.i16(), !range !0
+  %add = add i16 %x, 32
+  %and1 = and i16 %add, 48
+  %and2 = and i16 %x, 15
+  %or = or i16 %and1, %and2
+  ret i16 %or
+}
+
+define <2 x i16> @pass.v2i16() {
+; CHECK-LABEL: @pass.v2i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = call <2 x i16> @dummy.v2i16(), !range [[RNG0]]
+; CHECK-NEXT:    [[OR:%.*]] = xor <2 x i16> [[X]], <i16 32, i16 32>
+; CHECK-NEXT:    ret <2 x i16> [[OR]]
+;
+entry:
+  %x = call <2 x i16> @dummy.v2i16(), !range !0
+  %add = add <2 x i16> %x, <i16 32, i16 32>
+  %and1 = and <2 x i16> %add, <i16 48, i16 48>
+  %and2 = and <2 x i16> %x, <i16 15, i16 15>
+  %or = or <2 x i16> %and1, %and2
+  ret <2 x i16> %or
+}
+
+
+define i16 @fail.i16.1() {
+; CHECK-LABEL: @fail.i16.1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = call i16 @dummy.i16()
+; CHECK-NEXT:    [[ADD:%.*]] = add i16 [[X]], 32
+; CHECK-NEXT:    [[AND1:%.*]] = and i16 [[ADD]], 48
+; CHECK-NEXT:    [[AND2:%.*]] = and i16 [[X]], 15
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i16 [[AND1]], [[AND2]]
+; CHECK-NEXT:    ret i16 [[OR]]
+;
+entry:
+  %x = call i16 @dummy.i16()
+  %add = add i16 %x, 32
+  %and1 = and i16 %add, 48
+  %and2 = and i16 %x, 15
+  %or = or i16 %and1, %and2
+  ret i16 %or
+}
+
+define i16 @fail.i16.2() {
+; CHECK-LABEL: @fail.i16.2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = call i16 @dummy.i16(), !range [[RNG0]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i16 [[X]], 31
+; CHECK-NEXT:    [[AND1:%.*]] = and i16 [[ADD]], 48
+; CHECK-NEXT:    [[AND2:%.*]] = and i16 [[X]], 15
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i16 [[AND1]], [[AND2]]
+; CHECK-NEXT:    ret i16 [[OR]]
+;
+entry:
+  %x = call i16 @dummy.i16(), !range !0
+  %add = add i16 %x, 31
+  %and1 = and i16 %add, 48
+  %and2 = and i16 %x, 15
+  %or = or i16 %and1, %and2
+  ret i16 %or
+}
+
+define i16 @fail.i16.3() {
+; CHECK-LABEL: @fail.i16.3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = call i16 @dummy.i16(), !range [[RNG0]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i16 [[X]], 32
+; CHECK-NEXT:    [[AND1:%.*]] = and i16 [[ADD]], 48
+; CHECK-NEXT:    [[AND2:%.*]] = and i16 [[X]], 32
+; CHECK-NEXT:    [[OR:%.*]] = or i16 [[AND1]], [[AND2]]
+; CHECK-NEXT:    ret i16 [[OR]]
+;
+entry:
+  %x = call i16 @dummy.i16(), !range !0
+  %add = add i16 %x, 32
+  %and1 = and i16 %add, 48
+  %and2 = and i16 %x, 32
+  %or = or i16 %and1, %and2
+  ret i16 %or
+}
+
+define i16 @fail.i16.4() {
+; CHECK-LABEL: @fail.i16.4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[X:%.*]] = call i16 @dummy.i16(), !range [[RNG0]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i16 [[X]], 32
+; CHECK-NEXT:    [[AND1:%.*]] = and i16 [[ADD]], 49
+; CHECK-NEXT:    [[AND2:%.*]] = and i16 [[X]], 15
+; CHECK-NEXT:    [[OR:%.*]] = or i16 [[AND1]], [[AND2]]
+; CHECK-NEXT:    ret i16 [[OR]]
+;
+entry:
+  %x = call i16 @dummy.i16(), !range !0
+  %add = add i16 %x, 32
+  %and1 = and i16 %add, 49
+  %and2 = and i16 %x, 15
+  %or = or i16 %and1, %and2
+  ret i16 %or
+}
+
+declare i16 @dummy.i16()
+declare <2 x i16> @dummy.v2i16()
+
+!0 = !{i16 0, i16 64}

>From 3cdf54e3f39fbcd62fb1c610a27f23c24db0d0b7 Mon Sep 17 00:00:00 2001
From: Peter Han <fujun.han at iluvatar.com>
Date: Tue, 12 Dec 2023 14:45:28 +0800
Subject: [PATCH 2/2] [InstCombine]Simplify a complex OR to XOR

Following pattern:
  (or (and x, half_c1), c3), (and x, c2))
  IFF
    c1, c2, c3 is constant
    c1 is pow2
    c2 < c1
    c3 == (c1 - 1) ^ c2
    half_c1 = (lshr c1, 1)
    (c1 >> 1) & c3 == (c1 >> 1)
    x is known to be less than c1
can be simplified into:
  (xor x, half_c)

Proof: https://alive2.llvm.org/ce/z/Lfax3w

Signed-off-by: Peter Han <fujun.han at iluvatar.com>
---
 .../InstCombine/InstCombineAndOrXor.cpp       | 20 ++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 6002f599ca71ab..9fbd4f7b53ecd3 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3343,6 +3343,25 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
   if (Instruction *X = foldComplexAndOrPatterns(I, Builder))
     return X;
 
+  // Change (or (and (add x, half_c1), c3), (and x, c2)) to (xor x, half_c1),
+  // iff c1, c2, c3 is constant, and half_c1 = (lshr c1, 1),  and c1 is pow2,
+  // and c2 < c1, and c3 == (c1 - 1) ^ c2, and (c1 >> 1) & c3 == (c1 >> 1) and x
+  // is known to be less than c1.
+  Type *Ty = I.getType();
+  {
+    Value *X = nullptr;
+    const APInt *HalfC1 = nullptr, *C2 = nullptr, *C3 = nullptr;
+    if (match(&I, m_c_Or(m_And(m_Add(m_Value(X), m_APInt(HalfC1)), m_APInt(C3)),
+                         m_And(m_Value(X), m_APInt(C2))))) {
+      const APInt C1 = HalfC1->shl(1);
+      KnownBits KnownX = computeKnownBits(X, 0, &I);
+      if (C1.isPowerOf2() && C2->ult(C1) && (*C3 == (*C2 ^ (C1 - 1))) &&
+          ((*HalfC1 & *C3) == *HalfC1) && KnownX.getMaxValue().ult(C1)) {
+        return BinaryOperator::CreateXor(X, ConstantInt::get(Ty, *HalfC1));
+      }
+    }
+  }
+
   // (A&B)|(A&C) -> A&(B|C) etc
   if (Value *V = foldUsingDistributiveLaws(I))
     return replaceInstUsesWith(I, V);
@@ -3351,7 +3370,6 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
-  Type *Ty = I.getType();
   if (Ty->isIntOrIntVectorTy(1)) {
     if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
       if (auto *R =