[llvm] a52e780 - [GlobalISel] Combine (xor (and x, y), y) -> (and (not x), y)

Mon Sep 28 10:08:26 PDT 2020

Author: Jessica Paquette
Date: 2020-09-28T10:08:14-07:00
New Revision: a52e78012a548c231fb8cba81861f6ffb2246726

URL: https://github.com/llvm/llvm-project/commit/a52e78012a548c231fb8cba81861f6ffb2246726
DIFF: https://github.com/llvm/llvm-project/commit/a52e78012a548c231fb8cba81861f6ffb2246726.diff

LOG: [GlobalISel] Combine (xor (and x, y), y) -> (and (not x), y)

When we see this:

```
%and = G_AND %x, %y
%xor = G_XOR %and, %y
```

Produce this:

```
%not = G_XOR %x, -1
%new_and = G_AND %not, %y
```

as long as we are guaranteed to eliminate the original G_AND.

Also matches all commuted forms. E.g.

```
%and = G_AND %y, %x
%xor = G_XOR %y, %and
```

will be matched as well.

Differential Revision: https://reviews.llvm.org/D88104

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-xor-of-and-with-same-reg.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/lib/Target/AArch64/AArch64Combine.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 8ee3b545815b..2eab322df8b0 100644

--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -415,6 +415,14 @@ class CombinerHelper {
   bool matchNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate);
   bool applyNotCmp(MachineInstr &MI, SmallVectorImpl<Register> &RegsToNegate);
 
+  /// Fold (xor (and x, y), y) -> (and (not x), y)
+  ///{
+  bool matchXorOfAndWithSameReg(MachineInstr &MI,
+                                std::pair<Register, Register> &MatchInfo);
+  bool applyXorOfAndWithSameReg(MachineInstr &MI,
+                                std::pair<Register, Register> &MatchInfo);
+  ///}
+
   /// Try to transform \p MI by using all of the above
   /// combine functions. Returns true if changed.
   bool tryCombine(MachineInstr &MI);

diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index d3ccbb404949..316f6f6885c9 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -480,6 +480,16 @@ def mul_by_neg_one: GICombineRule <
   (apply [{ return Helper.applyCombineMulByNegativeOne(*${root}); }])
 >;
 
+// Fold (xor (and x, y), y) -> (and (not x), y)
+def xor_of_and_with_same_reg_matchinfo :
+    GIDefMatchData<"std::pair<Register, Register>">;
+def xor_of_and_with_same_reg: GICombineRule <
+  (defs root:$root, xor_of_and_with_same_reg_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_XOR):$root,
+         [{ return Helper.matchXorOfAndWithSameReg(*${root}, ${matchinfo}); }]),
+  (apply [{ return Helper.applyXorOfAndWithSameReg(*${root}, ${matchinfo}); }])
+>;
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -515,4 +525,4 @@ def all_combines : GICombineGroup<[trivial_combines, ptr_add_immed_chain,
     not_cmp_fold, opt_brcond_by_inverting_cond,
     unmerge_merge, fabs_fabs_fold, unmerge_cst, unmerge_dead_to_trunc,
     unmerge_zext_to_zext, trunc_ext_fold, trunc_shl,
-    constant_fp_op]>;
+    constant_fp_op, xor_of_and_with_same_reg]>;

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 65a71c4e350f..2024d87c60a0 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2720,6 +2720,52 @@ bool CombinerHelper::applyNotCmp(MachineInstr &MI,
   return true;
 }
 
+bool CombinerHelper::matchXorOfAndWithSameReg(
+    MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+  // Match (xor (and x, y), y) (or any of its commuted cases)
+  assert(MI.getOpcode() == TargetOpcode::G_XOR);
+  Register &X = MatchInfo.first;
+  Register &Y = MatchInfo.second;
+  Register AndReg = MI.getOperand(1).getReg();
+  Register SharedReg = MI.getOperand(2).getReg();
+
+  // Find a G_AND on either side of the G_XOR.
+  // Look for one of
+  //
+  // (xor (and x, y), SharedReg)
+  // (xor SharedReg, (and x, y))
+  if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
+    std::swap(AndReg, SharedReg);
+    if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
+      return false;
+  }
+
+  // Only do this if we'll eliminate the G_AND.
+  if (!MRI.hasOneNonDBGUse(AndReg))
+    return false;
+
+  // We can combine if SharedReg is the same as either the LHS or RHS of the
+  // G_AND.
+  if (Y != SharedReg)
+    std::swap(X, Y);
+  return Y == SharedReg;
+}
+
+bool CombinerHelper::applyXorOfAndWithSameReg(
+    MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
+  // Fold (xor (and x, y), y) -> (and (not x), y)
+  Builder.setInstrAndDebugLoc(MI);
+  Register X, Y;
+  std::tie(X, Y) = MatchInfo;
+  auto Not = Builder.buildNot(MRI.getType(X), X);
+  Observer.changingInstr(MI);
+  MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
+  MI.getOperand(1).setReg(Not->getOperand(0).getReg());
+  MI.getOperand(2).setReg(Y);
+  Observer.changedInstr(MI);
+  return true;
+}
+
 bool CombinerHelper::tryCombine(MachineInstr &MI) {
   if (tryCombineCopy(MI))
     return true;

diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index e493c216d858..de1639672871 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -89,6 +89,6 @@ def AArch64PostLegalizerCombinerHelper
                        [copy_prop, erase_undef_store, combines_for_extload,
                         sext_trunc_sextload, shuffle_vector_pseudos,
                         hoist_logic_op_with_same_opcode_hands,
-                        and_trivial_mask, vashr_vlshr_imm]> {
+                        and_trivial_mask, vashr_vlshr_imm, xor_of_and_with_same_reg]> {
   let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
 }

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-xor-of-and-with-same-reg.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-xor-of-and-with-same-reg.mir
new file mode 100644
index 000000000000..e8254c03ab63
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-xor-of-and-with-same-reg.mir
@@ -0,0 +1,177 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+
+# RUN: llc -mtriple aarch64 -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name:            fold_scalar
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; (xor (and x, y), y) -> (and (not x), y)
+    ; CHECK-LABEL: name: fold_scalar
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %x:_(s32) = COPY $w0
+    ; CHECK: %y:_(s32) = COPY $w1
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR %x, [[C]]
+    ; CHECK: %xor:_(s32) = G_AND [[XOR]], %y
+    ; CHECK: $w0 = COPY %xor(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %and:_(s32) = G_AND %x, %y
+    %xor:_(s32) = G_XOR %and, %y
+    $w0 = COPY %xor(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            fold_vector
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+    ; Vector edition
+    ; CHECK-LABEL: name: fold_vector
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %x:_(<2 x s32>) = COPY $x0
+    ; CHECK: %y:_(<2 x s32>) = COPY $x1
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+    ; CHECK: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR %x, [[BUILD_VECTOR]]
+    ; CHECK: %xor:_(<2 x s32>) = G_AND [[XOR]], %y
+    ; CHECK: $x0 = COPY %xor(<2 x s32>)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %x:_(<2 x s32>) = COPY $x0
+    %y:_(<2 x s32>) = COPY $x1
+    %and:_(<2 x s32>) = G_AND %x, %y
+    %xor:_(<2 x s32>) = G_XOR %and, %y
+    $x0 = COPY %xor(<2 x s32>)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            fold_commuted_and
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; (xor (and y, x), y) -> (and (not x), y)
+    ; CHECK-LABEL: name: fold_commuted_and
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %x:_(s32) = COPY $w0
+    ; CHECK: %y:_(s32) = COPY $w1
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR %x, [[C]]
+    ; CHECK: %xor:_(s32) = G_AND [[XOR]], %y
+    ; CHECK: $w0 = COPY %xor(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %and:_(s32) = G_AND %y, %x
+    %xor:_(s32) = G_XOR %and, %y
+    $w0 = COPY %xor(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            fold_commuted_xor
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; (xor y, (and x, y)) -> (and (not x), y)
+    ; CHECK-LABEL: name: fold_commuted_xor
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %x:_(s32) = COPY $w0
+    ; CHECK: %y:_(s32) = COPY $w1
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR %x, [[C]]
+    ; CHECK: %xor:_(s32) = G_AND [[XOR]], %y
+    ; CHECK: $w0 = COPY %xor(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %and:_(s32) = G_AND %x, %y
+    %xor:_(s32) = G_XOR %y, %and
+    $w0 = COPY %xor(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            fold_commuted_xor_and
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; (xor y, (and x, y)) -> (and (not x), y)
+    ; CHECK-LABEL: name: fold_commuted_xor_and
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %x:_(s32) = COPY $w0
+    ; CHECK: %y:_(s32) = COPY $w1
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1
+    ; CHECK: [[XOR:%[0-9]+]]:_(s32) = G_XOR %x, [[C]]
+    ; CHECK: %xor:_(s32) = G_AND [[XOR]], %y
+    ; CHECK: $w0 = COPY %xor(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %and:_(s32) = G_AND %y, %x
+    %xor:_(s32) = G_XOR %y, %and
+    $w0 = COPY %xor(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_fold_
diff erent_regs
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; The G_AND does not share any registers with the G_XOR
+    ; CHECK-LABEL: name: dont_fold_
diff erent_regs
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: %x:_(s32) = COPY $w0
+    ; CHECK: %y:_(s32) = COPY $w1
+    ; CHECK: %z:_(s32) = COPY $w2
+    ; CHECK: %and:_(s32) = G_AND %x, %z
+    ; CHECK: %xor:_(s32) = G_XOR %and, %y
+    ; CHECK: $w0 = COPY %xor(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %z:_(s32) = COPY $w2
+    %and:_(s32) = G_AND %x, %z
+    %xor:_(s32) = G_XOR %and, %y
+    $w0 = COPY %xor(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_fold_more_than_one_use
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; Don't fold when the G_AND is used outside the G_XOR.
+    ;
+    ; CHECK-LABEL: name: dont_fold_more_than_one_use
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: %x:_(s32) = COPY $w0
+    ; CHECK: %y:_(s32) = COPY $w1
+    ; CHECK: %z:_(s32) = COPY $w2
+    ; CHECK: %and:_(s32) = G_AND %x, %z
+    ; CHECK: %xor:_(s32) = G_XOR %and, %y
+    ; CHECK: %add:_(s32) = G_ADD %and, %xor
+    ; CHECK: $w0 = COPY %add(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %z:_(s32) = COPY $w2
+    %and:_(s32) = G_AND %x, %z
+    %xor:_(s32) = G_XOR %and, %y
+    %add:_(s32) = G_ADD %and, %xor
+    $w0 = COPY %add(s32)
+    RET_ReallyLR implicit $w0