[llvm] r330646 - [DAGCombiner] Unfold scalar masked merge if profitable

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 23 13:38:50 PDT 2018


Author: lebedevri
Date: Mon Apr 23 13:38:49 2018
New Revision: 330646

URL: http://llvm.org/viewvc/llvm-project?rev=330646&view=rev
Log:
[DAGCombiner] Unfold scalar masked merge if profitable

Summary:
This is [[ https://bugs.llvm.org/show_bug.cgi?id=37104 | PR37104 ]].

[[ https://bugs.llvm.org/show_bug.cgi?id=6773 | PR6773 ]] will introduce an IR canonicalization that is likely bad for the end assembly.
Previously, `andl`+`andn`/`andps`+`andnps` / `bic`/`bsl` would be generated. (see `@out`)
Now, they would no longer be generated  (see `@in`).
So we need to make sure that they are still generated.

If the mask is constant, we do nothing. InstCombine should have unfolded it.
Else, i use `hasAndNot()` TLI hook.

For now, only handle scalars.

https://rise4fun.com/Alive/bO6

----

I *really* don't like the code i wrote in `DAGCombiner::unfoldMaskedMerge()`.
It is super fragile. Is there something like IR Pattern Matchers for this?

Reviewers: spatel, craig.topper, RKSimon, javed.absar

Reviewed By: spatel

Subscribers: andreadb, courbet, kristof.beyls, javed.absar, rengolin, nemanjai, llvm-commits

Differential Revision: https://reviews.llvm.org/D45733

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll
    llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=330646&r1=330645&r2=330646&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Mon Apr 23 13:38:49 2018
@@ -414,6 +414,7 @@ namespace {
                                    SDValue N2, SDValue N3, ISD::CondCode CC);
     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
                               const SDLoc &DL);
+    SDValue unfoldMaskedMerge(SDNode *N);
     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
                           const SDLoc &DL, bool foldBooleans);
     SDValue rebuildSetCC(SDValue N);
@@ -5361,6 +5362,68 @@ SDValue DAGCombiner::MatchLoadCombine(SD
   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
 }
 
+// If the target has andn, bsl, or a similar bit-select instruction,
+// we want to unfold masked merge, with canonical pattern of:
+//   |        A  |  |B|
+//   ((x ^ y) & m) ^ y
+//    |  D  |
+// Into:
+//   (x & m) | (y & ~m)
+SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
+  assert(N->getOpcode() == ISD::XOR);
+
+  EVT VT = N->getValueType(0);
+
+  // FIXME
+  if (VT.isVector())
+    return SDValue();
+
+  // There are 3 commutable operators in the pattern,
+  // so we have to deal with 8 possible variants of the basic pattern.
+  SDValue X, Y, M;
+  auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
+    if (And.getOpcode() != ISD::AND || !And.hasOneUse())
+      return false;
+    if (And.getOperand(XorIdx).getOpcode() != ISD::XOR ||
+        !And.getOperand(XorIdx).hasOneUse())
+      return false;
+    SDValue Xor0 = And.getOperand(XorIdx).getOperand(0);
+    SDValue Xor1 = And.getOperand(XorIdx).getOperand(1);
+    if (Other == Xor0)
+      std::swap(Xor0, Xor1);
+    if (Other != Xor1)
+      return false;
+    X = Xor0;
+    Y = Xor1;
+    M = And.getOperand(XorIdx ? 0 : 1);
+    return true;
+  };
+
+  SDValue A = N->getOperand(0);
+  SDValue B = N->getOperand(1);
+  if (!matchAndXor(A, 0, B) && !matchAndXor(A, 1, B) && !matchAndXor(B, 0, A) &&
+      !matchAndXor(B, 1, A))
+    return SDValue();
+
+  // Don't do anything if the mask is constant. This should not be reachable.
+  // InstCombine should have already unfolded this pattern, and DAGCombiner
+  // probably shouldn't produce it, too.
+  if (isa<ConstantSDNode>(M.getNode()))
+    return SDValue();
+
+  // We can transform if the target has AndNot
+  if (!TLI.hasAndNot(M))
+    return SDValue();
+
+  SDLoc DL(N);
+
+  SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
+  SDValue NotM = DAG.getNOT(DL, M, VT);
+  SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
+
+  return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
+}
+
 SDValue DAGCombiner::visitXOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -5516,6 +5579,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N)
     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
       return Tmp;
 
+  // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
+  if (SDValue MM = unfoldMaskedMerge(N))
+    return MM;
+
   // Simplify the expression using non-local knowledge.
   if (SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);

Modified: llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll?rev=330646&r1=330645&r2=330646&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll Mon Apr 23 13:38:49 2018
@@ -65,9 +65,9 @@ define i64 @out64(i64 %x, i64 %y, i64 %m
 define i8 @in8(i8 %x, i8 %y, i8 %mask) {
 ; CHECK-LABEL: in8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    and w8, w8, w2
-; CHECK-NEXT:    eor w0, w8, w1
+; CHECK-NEXT:    and w8, w0, w2
+; CHECK-NEXT:    bic w9, w1, w2
+; CHECK-NEXT:    orr w0, w8, w9
 ; CHECK-NEXT:    ret
   %n0 = xor i8 %x, %y
   %n1 = and i8 %n0, %mask
@@ -78,9 +78,9 @@ define i8 @in8(i8 %x, i8 %y, i8 %mask) {
 define i16 @in16(i16 %x, i16 %y, i16 %mask) {
 ; CHECK-LABEL: in16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    and w8, w8, w2
-; CHECK-NEXT:    eor w0, w8, w1
+; CHECK-NEXT:    and w8, w0, w2
+; CHECK-NEXT:    bic w9, w1, w2
+; CHECK-NEXT:    orr w0, w8, w9
 ; CHECK-NEXT:    ret
   %n0 = xor i16 %x, %y
   %n1 = and i16 %n0, %mask
@@ -91,9 +91,9 @@ define i16 @in16(i16 %x, i16 %y, i16 %ma
 define i32 @in32(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-LABEL: in32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    and w8, w8, w2
-; CHECK-NEXT:    eor w0, w8, w1
+; CHECK-NEXT:    bic w8, w1, w2
+; CHECK-NEXT:    and w9, w0, w2
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
@@ -104,9 +104,9 @@ define i32 @in32(i32 %x, i32 %y, i32 %ma
 define i64 @in64(i64 %x, i64 %y, i64 %mask) {
 ; CHECK-LABEL: in64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor x8, x0, x1
-; CHECK-NEXT:    and x8, x8, x2
-; CHECK-NEXT:    eor x0, x8, x1
+; CHECK-NEXT:    bic x8, x1, x2
+; CHECK-NEXT:    and x9, x0, x2
+; CHECK-NEXT:    orr x0, x9, x8
 ; CHECK-NEXT:    ret
   %n0 = xor i64 %x, %y
   %n1 = and i64 %n0, %mask
@@ -119,9 +119,9 @@ define i64 @in64(i64 %x, i64 %y, i64 %ma
 define i32 @in_commutativity_0_0_1(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-LABEL: in_commutativity_0_0_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    and w8, w2, w8
-; CHECK-NEXT:    eor w0, w8, w1
+; CHECK-NEXT:    bic w8, w1, w2
+; CHECK-NEXT:    and w9, w0, w2
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %mask, %n0 ; swapped
@@ -131,9 +131,9 @@ define i32 @in_commutativity_0_0_1(i32 %
 define i32 @in_commutativity_0_1_0(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-LABEL: in_commutativity_0_1_0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    and w8, w8, w2
-; CHECK-NEXT:    eor w0, w1, w8
+; CHECK-NEXT:    bic w8, w1, w2
+; CHECK-NEXT:    and w9, w0, w2
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
@@ -143,9 +143,9 @@ define i32 @in_commutativity_0_1_0(i32 %
 define i32 @in_commutativity_0_1_1(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-LABEL: in_commutativity_0_1_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    and w8, w2, w8
-; CHECK-NEXT:    eor w0, w1, w8
+; CHECK-NEXT:    bic w8, w1, w2
+; CHECK-NEXT:    and w9, w0, w2
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %mask, %n0 ; swapped
@@ -155,9 +155,9 @@ define i32 @in_commutativity_0_1_1(i32 %
 define i32 @in_commutativity_1_0_0(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-LABEL: in_commutativity_1_0_0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    and w8, w8, w2
-; CHECK-NEXT:    eor w0, w8, w0
+; CHECK-NEXT:    bic w8, w0, w2
+; CHECK-NEXT:    and w9, w1, w2
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
@@ -167,9 +167,9 @@ define i32 @in_commutativity_1_0_0(i32 %
 define i32 @in_commutativity_1_0_1(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-LABEL: in_commutativity_1_0_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    and w8, w2, w8
-; CHECK-NEXT:    eor w0, w8, w0
+; CHECK-NEXT:    bic w8, w0, w2
+; CHECK-NEXT:    and w9, w1, w2
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %mask, %n0 ; swapped
@@ -179,9 +179,9 @@ define i32 @in_commutativity_1_0_1(i32 %
 define i32 @in_commutativity_1_1_0(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-LABEL: in_commutativity_1_1_0:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    and w8, w8, w2
-; CHECK-NEXT:    eor w0, w0, w8
+; CHECK-NEXT:    bic w8, w0, w2
+; CHECK-NEXT:    and w9, w1, w2
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
@@ -191,9 +191,9 @@ define i32 @in_commutativity_1_1_0(i32 %
 define i32 @in_commutativity_1_1_1(i32 %x, i32 %y, i32 %mask) {
 ; CHECK-LABEL: in_commutativity_1_1_1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    and w8, w2, w8
-; CHECK-NEXT:    eor w0, w0, w8
+; CHECK-NEXT:    bic w8, w0, w2
+; CHECK-NEXT:    and w9, w1, w2
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %n0 = xor i32 %x, %y
   %n1 = and i32 %mask, %n0 ; swapped
@@ -207,9 +207,9 @@ define i32 @in_complex_y0(i32 %x, i32 %y
 ; CHECK-LABEL: in_complex_y0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w1, w2
-; CHECK-NEXT:    eor w9, w0, w8
-; CHECK-NEXT:    and w9, w9, w3
-; CHECK-NEXT:    eor w0, w9, w8
+; CHECK-NEXT:    and w9, w0, w3
+; CHECK-NEXT:    bic w8, w8, w3
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %y = and i32 %y_hi, %y_low
   %n0 = xor i32 %x, %y
@@ -221,9 +221,9 @@ define i32 @in_complex_y1(i32 %x, i32 %y
 ; CHECK-LABEL: in_complex_y1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w1, w2
-; CHECK-NEXT:    eor w9, w0, w8
-; CHECK-NEXT:    and w9, w9, w3
-; CHECK-NEXT:    eor w0, w8, w9
+; CHECK-NEXT:    and w9, w0, w3
+; CHECK-NEXT:    bic w8, w8, w3
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %y = and i32 %y_hi, %y_low
   %n0 = xor i32 %x, %y
@@ -238,9 +238,9 @@ define i32 @in_complex_m0(i32 %x, i32 %y
 ; CHECK-LABEL: in_complex_m0:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    eor w8, w2, w3
-; CHECK-NEXT:    eor w9, w0, w1
-; CHECK-NEXT:    and w8, w9, w8
-; CHECK-NEXT:    eor w0, w8, w1
+; CHECK-NEXT:    bic w9, w1, w8
+; CHECK-NEXT:    and w8, w0, w8
+; CHECK-NEXT:    orr w0, w8, w9
 ; CHECK-NEXT:    ret
   %mask = xor i32 %m_a, %m_b
   %n0 = xor i32 %x, %y
@@ -252,9 +252,9 @@ define i32 @in_complex_m1(i32 %x, i32 %y
 ; CHECK-LABEL: in_complex_m1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    eor w8, w2, w3
-; CHECK-NEXT:    eor w9, w0, w1
-; CHECK-NEXT:    and w8, w8, w9
-; CHECK-NEXT:    eor w0, w8, w1
+; CHECK-NEXT:    bic w9, w1, w8
+; CHECK-NEXT:    and w8, w0, w8
+; CHECK-NEXT:    orr w0, w8, w9
 ; CHECK-NEXT:    ret
   %mask = xor i32 %m_a, %m_b
   %n0 = xor i32 %x, %y
@@ -270,9 +270,9 @@ define i32 @in_complex_y0_m0(i32 %x, i32
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w1, w2
 ; CHECK-NEXT:    eor w9, w3, w4
-; CHECK-NEXT:    eor w10, w0, w8
-; CHECK-NEXT:    and w9, w10, w9
-; CHECK-NEXT:    eor w0, w9, w8
+; CHECK-NEXT:    bic w8, w8, w9
+; CHECK-NEXT:    and w9, w0, w9
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %y = and i32 %y_hi, %y_low
   %mask = xor i32 %m_a, %m_b
@@ -286,9 +286,9 @@ define i32 @in_complex_y1_m0(i32 %x, i32
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w1, w2
 ; CHECK-NEXT:    eor w9, w3, w4
-; CHECK-NEXT:    eor w10, w0, w8
-; CHECK-NEXT:    and w9, w10, w9
-; CHECK-NEXT:    eor w0, w8, w9
+; CHECK-NEXT:    bic w8, w8, w9
+; CHECK-NEXT:    and w9, w0, w9
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %y = and i32 %y_hi, %y_low
   %mask = xor i32 %m_a, %m_b
@@ -302,9 +302,9 @@ define i32 @in_complex_y0_m1(i32 %x, i32
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w1, w2
 ; CHECK-NEXT:    eor w9, w3, w4
-; CHECK-NEXT:    eor w10, w0, w8
-; CHECK-NEXT:    and w9, w9, w10
-; CHECK-NEXT:    eor w0, w9, w8
+; CHECK-NEXT:    bic w8, w8, w9
+; CHECK-NEXT:    and w9, w0, w9
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %y = and i32 %y_hi, %y_low
   %mask = xor i32 %m_a, %m_b
@@ -318,9 +318,9 @@ define i32 @in_complex_y1_m1(i32 %x, i32
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    and w8, w1, w2
 ; CHECK-NEXT:    eor w9, w3, w4
-; CHECK-NEXT:    eor w10, w0, w8
-; CHECK-NEXT:    and w9, w9, w10
-; CHECK-NEXT:    eor w0, w8, w9
+; CHECK-NEXT:    bic w8, w8, w9
+; CHECK-NEXT:    and w9, w0, w9
+; CHECK-NEXT:    orr w0, w9, w8
 ; CHECK-NEXT:    ret
   %y = and i32 %y_hi, %y_low
   %mask = xor i32 %m_a, %m_b

Modified: llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll?rev=330646&r1=330645&r2=330646&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll (original)
+++ llvm/trunk/test/CodeGen/X86/unfold-masked-merge-scalar-variablemask.ll Mon Apr 23 13:38:49 2018
@@ -112,10 +112,10 @@ define i8 @in8(i8 %x, i8 %y, i8 %mask) {
 ;
 ; CHECK-BMI-LABEL: in8:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
+; CHECK-BMI-NEXT:    andnl %esi, %edx, %eax
 ; CHECK-BMI-NEXT:    andl %edx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    orl %edi, %eax
+; CHECK-BMI-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i8 %x, %y
   %n1 = and i8 %n0, %mask
@@ -134,10 +134,10 @@ define i16 @in16(i16 %x, i16 %y, i16 %ma
 ;
 ; CHECK-BMI-LABEL: in16:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
+; CHECK-BMI-NEXT:    andnl %esi, %edx, %eax
 ; CHECK-BMI-NEXT:    andl %edx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    orl %edi, %eax
+; CHECK-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i16 %x, %y
   %n1 = and i16 %n0, %mask
@@ -156,10 +156,9 @@ define i32 @in32(i32 %x, i32 %y, i32 %ma
 ;
 ; CHECK-BMI-LABEL: in32:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
+; CHECK-BMI-NEXT:    andnl %esi, %edx, %eax
 ; CHECK-BMI-NEXT:    andl %edx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    orl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
@@ -178,10 +177,9 @@ define i64 @in64(i64 %x, i64 %y, i64 %ma
 ;
 ; CHECK-BMI-LABEL: in64:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorq %rsi, %rdi
+; CHECK-BMI-NEXT:    andnq %rsi, %rdx, %rax
 ; CHECK-BMI-NEXT:    andq %rdx, %rdi
-; CHECK-BMI-NEXT:    xorq %rsi, %rdi
-; CHECK-BMI-NEXT:    movq %rdi, %rax
+; CHECK-BMI-NEXT:    orq %rdi, %rax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i64 %x, %y
   %n1 = and i64 %n0, %mask
@@ -202,10 +200,9 @@ define i32 @in_commutativity_0_0_1(i32 %
 ;
 ; CHECK-BMI-LABEL: in_commutativity_0_0_1:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
+; CHECK-BMI-NEXT:    andnl %esi, %edx, %eax
 ; CHECK-BMI-NEXT:    andl %edx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    orl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %mask, %n0 ; swapped
@@ -223,10 +220,9 @@ define i32 @in_commutativity_0_1_0(i32 %
 ;
 ; CHECK-BMI-LABEL: in_commutativity_0_1_0:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
+; CHECK-BMI-NEXT:    andnl %esi, %edx, %eax
 ; CHECK-BMI-NEXT:    andl %edx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    orl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
@@ -244,10 +240,9 @@ define i32 @in_commutativity_0_1_1(i32 %
 ;
 ; CHECK-BMI-LABEL: in_commutativity_0_1_1:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %esi, %edi
+; CHECK-BMI-NEXT:    andnl %esi, %edx, %eax
 ; CHECK-BMI-NEXT:    andl %edx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    orl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %mask, %n0 ; swapped
@@ -265,10 +260,9 @@ define i32 @in_commutativity_1_0_0(i32 %
 ;
 ; CHECK-BMI-LABEL: in_commutativity_1_0_0:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %edi, %esi
+; CHECK-BMI-NEXT:    andnl %edi, %edx, %eax
 ; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    xorl %edi, %esi
-; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    orl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
@@ -286,10 +280,9 @@ define i32 @in_commutativity_1_0_1(i32 %
 ;
 ; CHECK-BMI-LABEL: in_commutativity_1_0_1:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %edi, %esi
+; CHECK-BMI-NEXT:    andnl %edi, %edx, %eax
 ; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    xorl %edi, %esi
-; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    orl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %mask, %n0 ; swapped
@@ -307,10 +300,9 @@ define i32 @in_commutativity_1_1_0(i32 %
 ;
 ; CHECK-BMI-LABEL: in_commutativity_1_1_0:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %edi, %esi
+; CHECK-BMI-NEXT:    andnl %edi, %edx, %eax
 ; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    xorl %edi, %esi
-; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    orl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %n0, %mask
@@ -328,10 +320,9 @@ define i32 @in_commutativity_1_1_1(i32 %
 ;
 ; CHECK-BMI-LABEL: in_commutativity_1_1_1:
 ; CHECK-BMI:       # %bb.0:
-; CHECK-BMI-NEXT:    xorl %edi, %esi
+; CHECK-BMI-NEXT:    andnl %edi, %edx, %eax
 ; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    xorl %edi, %esi
-; CHECK-BMI-NEXT:    movl %esi, %eax
+; CHECK-BMI-NEXT:    orl %esi, %eax
 ; CHECK-BMI-NEXT:    retq
   %n0 = xor i32 %x, %y
   %n1 = and i32 %mask, %n0 ; swapped
@@ -354,10 +345,9 @@ define i32 @in_complex_y0(i32 %x, i32 %y
 ; CHECK-BMI-LABEL: in_complex_y0:
 ; CHECK-BMI:       # %bb.0:
 ; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    andl %ecx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andnl %esi, %ecx, %eax
+; CHECK-BMI-NEXT:    orl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %y = and i32 %y_hi, %y_low
   %n0 = xor i32 %x, %y
@@ -378,10 +368,9 @@ define i32 @in_complex_y1(i32 %x, i32 %y
 ; CHECK-BMI-LABEL: in_complex_y1:
 ; CHECK-BMI:       # %bb.0:
 ; CHECK-BMI-NEXT:    andl %edx, %esi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
 ; CHECK-BMI-NEXT:    andl %ecx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andnl %esi, %ecx, %eax
+; CHECK-BMI-NEXT:    orl %edi, %eax
 ; CHECK-BMI-NEXT:    retq
   %y = and i32 %y_hi, %y_low
   %n0 = xor i32 %x, %y
@@ -405,10 +394,9 @@ define i32 @in_complex_m0(i32 %x, i32 %y
 ; CHECK-BMI-LABEL: in_complex_m0:
 ; CHECK-BMI:       # %bb.0:
 ; CHECK-BMI-NEXT:    xorl %ecx, %edx
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl %edx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andnl %esi, %edx, %eax
+; CHECK-BMI-NEXT:    andl %edi, %edx
+; CHECK-BMI-NEXT:    orl %edx, %eax
 ; CHECK-BMI-NEXT:    retq
   %mask = xor i32 %m_a, %m_b
   %n0 = xor i32 %x, %y
@@ -429,10 +417,9 @@ define i32 @in_complex_m1(i32 %x, i32 %y
 ; CHECK-BMI-LABEL: in_complex_m1:
 ; CHECK-BMI:       # %bb.0:
 ; CHECK-BMI-NEXT:    xorl %ecx, %edx
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl %edx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andnl %esi, %edx, %eax
+; CHECK-BMI-NEXT:    andl %edi, %edx
+; CHECK-BMI-NEXT:    orl %edx, %eax
 ; CHECK-BMI-NEXT:    retq
   %mask = xor i32 %m_a, %m_b
   %n0 = xor i32 %x, %y
@@ -458,10 +445,9 @@ define i32 @in_complex_y0_m0(i32 %x, i32
 ; CHECK-BMI:       # %bb.0:
 ; CHECK-BMI-NEXT:    andl %edx, %esi
 ; CHECK-BMI-NEXT:    xorl %r8d, %ecx
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl %ecx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andnl %esi, %ecx, %eax
+; CHECK-BMI-NEXT:    andl %edi, %ecx
+; CHECK-BMI-NEXT:    orl %ecx, %eax
 ; CHECK-BMI-NEXT:    retq
   %y = and i32 %y_hi, %y_low
   %mask = xor i32 %m_a, %m_b
@@ -485,10 +471,9 @@ define i32 @in_complex_y1_m0(i32 %x, i32
 ; CHECK-BMI:       # %bb.0:
 ; CHECK-BMI-NEXT:    andl %edx, %esi
 ; CHECK-BMI-NEXT:    xorl %r8d, %ecx
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl %ecx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andnl %esi, %ecx, %eax
+; CHECK-BMI-NEXT:    andl %edi, %ecx
+; CHECK-BMI-NEXT:    orl %ecx, %eax
 ; CHECK-BMI-NEXT:    retq
   %y = and i32 %y_hi, %y_low
   %mask = xor i32 %m_a, %m_b
@@ -512,10 +497,9 @@ define i32 @in_complex_y0_m1(i32 %x, i32
 ; CHECK-BMI:       # %bb.0:
 ; CHECK-BMI-NEXT:    andl %edx, %esi
 ; CHECK-BMI-NEXT:    xorl %r8d, %ecx
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl %ecx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andnl %esi, %ecx, %eax
+; CHECK-BMI-NEXT:    andl %edi, %ecx
+; CHECK-BMI-NEXT:    orl %ecx, %eax
 ; CHECK-BMI-NEXT:    retq
   %y = and i32 %y_hi, %y_low
   %mask = xor i32 %m_a, %m_b
@@ -539,10 +523,9 @@ define i32 @in_complex_y1_m1(i32 %x, i32
 ; CHECK-BMI:       # %bb.0:
 ; CHECK-BMI-NEXT:    andl %edx, %esi
 ; CHECK-BMI-NEXT:    xorl %r8d, %ecx
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    andl %ecx, %edi
-; CHECK-BMI-NEXT:    xorl %esi, %edi
-; CHECK-BMI-NEXT:    movl %edi, %eax
+; CHECK-BMI-NEXT:    andnl %esi, %ecx, %eax
+; CHECK-BMI-NEXT:    andl %edi, %ecx
+; CHECK-BMI-NEXT:    orl %ecx, %eax
 ; CHECK-BMI-NEXT:    retq
   %y = and i32 %y_hi, %y_low
   %mask = xor i32 %m_a, %m_b




More information about the llvm-commits mailing list