[llvm] r335433 - [DAGCombiner] eliminate setcc bool math when input is low-bit of some value

Sun Jun 24 07:37:30 PDT 2018

Author: spatel
Date: Sun Jun 24 07:37:30 2018
New Revision: 335433

URL: http://llvm.org/viewvc/llvm-project?rev=335433&view=rev
Log:
[DAGCombiner] eliminate setcc bool math when input is low-bit of some value

This patch has the same motivating example as D48466:
define void @foo(i64 %x, i32 %c.0282.in, i32 %d.0280, i32* %ptr0, i32* %ptr1) {
    %c.0282 = and i32 %c.0282.in, 268435455
    %a16 = lshr i64 32508, %x
    %a17 = and i64 %a16, 1
    %tobool = icmp eq i64 %a17, 0
    %. = select i1 %tobool, i32 1, i32 2
    %.286 = select i1 %tobool, i32 27, i32 26
    %shr97 = lshr i32 %c.0282, %.
    %shl98 = shl i32 %c.0282.in, %.286
    %or99 = or i32 %shr97, %shl98
    %shr100 = lshr i32 %d.0280, %.
    %shl101 = shl i32 %d.0280, %.286
    %or102 = or i32 %shr100, %shl101
    store i32 %or99, i32* %ptr0
    store i32 %or102, i32* %ptr1
    ret void
}

...but I'm trying to kill the setcc bool math sooner rather than later.

By matching a larger pattern that includes both the low-bit mask and the trailing add/sub, 
we can create a universally good fold because we always eliminate the condition code 
intermediate value.

Here are Alive proofs for these (currently instcombine folds the 'add' variants, but 
misses the 'sub' patterns):
https://rise4fun.com/Alive/Gsyp

Name: sub of zext cmp mask
  %a = and i8 %x, 1
  %c = icmp eq i8 %a, 0
  %z = zext i1 %c to i32
  %r = sub i32 C1, %z
  =>
  %optional_cast = zext i8 %a to i32
  %r = add i32 %optional_cast, C1-1

Name: add of zext cmp mask
  %a = and i32 %x, 1
  %c = icmp eq i32 %a, 0
  %z = zext i1 %c to i8
  %r = add i8 %z, C1
  =>
  %optional_cast = trunc i32 %a to i8
  %r = sub i8 C1+1, %optional_cast

All of the tests look like improvements or neutral to me. But it is possible that x86 
test+set+bitop is better than what we now show here. I suspect we could do better by 
adding another fold for the 'sub' variants.

We start with select-of-constant in IR in the larger motivating test, so that's why I 
included tests with selects. Proofs for those variants:
https://rise4fun.com/Alive/Bx1

Name: true const is bigger
Pre: C2 == (C1 + 1)
  %a = and i8 %x, 1
  %c = icmp eq i8 %a, 0
  %r = select i1 %c, i64 C2, i64 C1
  =>
  %z = zext i8 %a to i64
  %r = sub i64 C2, %z

Name: false const is bigger
Pre: C2 == (C1 + 1)
  %a = and i8 %x, 1
  %c = icmp eq i8 %a, 0
  %r = select i1 %c, i64 C1, i64 C2
  =>
  %z = zext i8 %a to i64
  %r = add i64 C1, %z

Differential Revision: https://reviews.llvm.org/D48466

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/PowerPC/bool-math.ll
    llvm/trunk/test/CodeGen/X86/bool-math.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=335433&r1=335432&r2=335433&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Sun Jun 24 07:37:30 2018
@@ -1949,6 +1949,45 @@ SDValue DAGCombiner::foldBinOpIntoSelect
   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
 }
 
+static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
+  assert(N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB &&
+         "Expecting add or sub");
+
+  // Match a constant operand and a zext operand for the math instruction:
+  // add Z, C
+  // sub C, Z
+  bool IsAdd = N->getOpcode() == ISD::ADD;
+  SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
+  SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
+  auto *CN = dyn_cast<ConstantSDNode>(C);
+  if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
+    return SDValue();
+
+  // Match the zext operand as a setcc of a boolean.
+  if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
+      Z.getOperand(0).getValueType() != MVT::i1)
+    return SDValue();
+
+  // Match the compare as: setcc (X & 1), 0, eq.
+  SDValue SetCC = Z.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
+  if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
+      SetCC.getOperand(0).getOpcode() != ISD::AND ||
+      !isOneConstant(SetCC.getOperand(0).getOperand(1)))
+    return SDValue();
+
+  // We are adding/subtracting a constant and an inverted low bit. Turn that
+  // into a subtract/add of the low bit with incremented/decremented constant:
+  // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
+  // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
+  EVT VT = C.getValueType();
+  SDLoc DL(N);
+  SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
+  SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
+                       DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
+  return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
+}
+
 SDValue DAGCombiner::visitADD(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -2080,6 +2119,9 @@ SDValue DAGCombiner::visitADD(SDNode *N)
                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
   }
 
+  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+    return V;
+
   if (SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
@@ -2579,6 +2621,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N)
   if (N1.isUndef())
     return N1;
 
+  if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
+    return V;
+
   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {

Modified: llvm/trunk/test/CodeGen/PowerPC/bool-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/bool-math.ll?rev=335433&r1=335432&r2=335433&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/bool-math.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/bool-math.ll Sun Jun 24 07:37:30 2018
@@ -4,9 +4,9 @@
 define i32 @sub_zext_cmp_mask_same_size_result(i32 %x) {
 ; CHECK-LABEL: sub_zext_cmp_mask_same_size_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    nor 3, 3, 3
-; CHECK-NEXT:    clrlwi 3, 3, 31
-; CHECK-NEXT:    subfic 3, 3, -27
+; CHECK-NEXT:    clrldi 3, 3, 63
+; CHECK-NEXT:    ori 3, 3, 65508
+; CHECK-NEXT:    oris 3, 3, 65535
 ; CHECK-NEXT:    blr
   %a = and i32 %x, 1
   %c = icmp eq i32 %a, 0
@@ -18,9 +18,8 @@ define i32 @sub_zext_cmp_mask_same_size_
 define i32 @sub_zext_cmp_mask_wider_result(i8 %x) {
 ; CHECK-LABEL: sub_zext_cmp_mask_wider_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    nor 3, 3, 3
-; CHECK-NEXT:    clrlwi 3, 3, 31
-; CHECK-NEXT:    subfic 3, 3, 27
+; CHECK-NEXT:    clrldi 3, 3, 63
+; CHECK-NEXT:    ori 3, 3, 26
 ; CHECK-NEXT:    blr
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -32,9 +31,8 @@ define i32 @sub_zext_cmp_mask_wider_resu
 define i8 @sub_zext_cmp_mask_narrower_result(i32 %x) {
 ; CHECK-LABEL: sub_zext_cmp_mask_narrower_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    nor 3, 3, 3
-; CHECK-NEXT:    clrlwi 3, 3, 31
-; CHECK-NEXT:    subfic 3, 3, 47
+; CHECK-NEXT:    clrldi 3, 3, 63
+; CHECK-NEXT:    ori 3, 3, 46
 ; CHECK-NEXT:    blr
   %a = and i32 %x, 1
   %c = icmp eq i32 %a, 0
@@ -46,10 +44,8 @@ define i8 @sub_zext_cmp_mask_narrower_re
 define i8 @add_zext_cmp_mask_same_size_result(i8 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_same_size_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xori 3, 3, 65535
-; CHECK-NEXT:    xoris 3, 3, 65535
-; CHECK-NEXT:    clrldi 3, 3, 63
-; CHECK-NEXT:    ori 3, 3, 26
+; CHECK-NEXT:    rlwinm 3, 3, 0, 31, 31
+; CHECK-NEXT:    subfic 3, 3, 27
 ; CHECK-NEXT:    blr
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -61,10 +57,8 @@ define i8 @add_zext_cmp_mask_same_size_r
 define i32 @add_zext_cmp_mask_wider_result(i8 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_wider_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xori 3, 3, 65535
-; CHECK-NEXT:    xoris 3, 3, 65535
-; CHECK-NEXT:    clrldi 3, 3, 63
-; CHECK-NEXT:    ori 3, 3, 26
+; CHECK-NEXT:    rlwinm 3, 3, 0, 31, 31
+; CHECK-NEXT:    subfic 3, 3, 27
 ; CHECK-NEXT:    blr
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -76,10 +70,8 @@ define i32 @add_zext_cmp_mask_wider_resu
 define i8 @add_zext_cmp_mask_narrower_result(i32 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_narrower_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xori 3, 3, 65535
-; CHECK-NEXT:    xoris 3, 3, 65535
-; CHECK-NEXT:    clrldi 3, 3, 63
-; CHECK-NEXT:    ori 3, 3, 42
+; CHECK-NEXT:    rlwinm 3, 3, 0, 31, 31
+; CHECK-NEXT:    subfic 3, 3, 43
 ; CHECK-NEXT:    blr
   %a = and i32 %x, 1
   %c = icmp eq i32 %a, 0
@@ -130,11 +122,8 @@ define i16 @low_bit_select_constants_big
 define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_same_size_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xori 3, 3, 65535
-; CHECK-NEXT:    xoris 3, 3, 65535
-; CHECK-NEXT:    clrldi 3, 3, 63
-; CHECK-NEXT:    ori 3, 3, 65506
-; CHECK-NEXT:    oris 3, 3, 65535
+; CHECK-NEXT:    rlwinm 3, 3, 0, 31, 31
+; CHECK-NEXT:    subfic 3, 3, -29
 ; CHECK-NEXT:    blr
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -145,9 +134,8 @@ define i8 @low_bit_select_constants_bigg
 define i32 @low_bit_select_constants_bigger_true_wider_result(i8 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_wider_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    not 3, 3
 ; CHECK-NEXT:    clrldi 3, 3, 63
-; CHECK-NEXT:    ori 3, 3, 226
+; CHECK-NEXT:    subfic 3, 3, 227
 ; CHECK-NEXT:    blr
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -158,10 +146,8 @@ define i32 @low_bit_select_constants_big
 define i8 @low_bit_select_constants_bigger_true_narrower_result(i16 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_narrower_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xori 3, 3, 65535
-; CHECK-NEXT:    xoris 3, 3, 65535
-; CHECK-NEXT:    clrldi 3, 3, 63
-; CHECK-NEXT:    ori 3, 3, 40
+; CHECK-NEXT:    rlwinm 3, 3, 0, 31, 31
+; CHECK-NEXT:    subfic 3, 3, 41
 ; CHECK-NEXT:    blr
   %a = and i16 %x, 1
   %c = icmp eq i16 %a, 0

Modified: llvm/trunk/test/CodeGen/X86/bool-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bool-math.ll?rev=335433&r1=335432&r2=335433&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bool-math.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bool-math.ll Sun Jun 24 07:37:30 2018
@@ -4,10 +4,9 @@
 define i32 @sub_zext_cmp_mask_same_size_result(i32 %x) {
 ; CHECK-LABEL: sub_zext_cmp_mask_same_size_result:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    movl $-27, %eax
-; CHECK-NEXT:    sbbl $0, %eax
+; CHECK-NEXT:    leal -28(%rdi), %eax
 ; CHECK-NEXT:    retq
   %a = and i32 %x, 1
   %c = icmp eq i32 %a, 0
@@ -19,10 +18,9 @@ define i32 @sub_zext_cmp_mask_same_size_
 define i32 @sub_zext_cmp_mask_wider_result(i8 %x) {
 ; CHECK-LABEL: sub_zext_cmp_mask_wider_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    andb $1, %dil
-; CHECK-NEXT:    cmpb $1, %dil
-; CHECK-NEXT:    movl $27, %eax
-; CHECK-NEXT:    sbbl $0, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    leal 26(%rdi), %eax
 ; CHECK-NEXT:    retq
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -35,9 +33,8 @@ define i8 @sub_zext_cmp_mask_narrower_re
 ; CHECK-LABEL: sub_zext_cmp_mask_narrower_result:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    movb $47, %al
-; CHECK-NEXT:    sbbb $0, %al
+; CHECK-NEXT:    orb $46, %dil
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
   %a = and i32 %x, 1
   %c = icmp eq i32 %a, 0
@@ -49,9 +46,9 @@ define i8 @sub_zext_cmp_mask_narrower_re
 define i8 @add_zext_cmp_mask_same_size_result(i8 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_same_size_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    orb $26, %al
+; CHECK-NEXT:    andb $1, %dil
+; CHECK-NEXT:    movb $27, %al
+; CHECK-NEXT:    subb %dil, %al
 ; CHECK-NEXT:    retq
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -63,10 +60,9 @@ define i8 @add_zext_cmp_mask_same_size_r
 define i32 @add_zext_cmp_mask_wider_result(i8 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_wider_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    orl $26, %eax
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    movl $27, %eax
+; CHECK-NEXT:    subl %edi, %eax
 ; CHECK-NEXT:    retq
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -78,9 +74,9 @@ define i32 @add_zext_cmp_mask_wider_resu
 define i8 @add_zext_cmp_mask_narrower_result(i32 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_narrower_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    orb $42, %al
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    movb $43, %al
+; CHECK-NEXT:    subb %dil, %al
 ; CHECK-NEXT:    retq
   %a = and i32 %x, 1
   %c = icmp eq i32 %a, 0
@@ -92,10 +88,9 @@ define i8 @add_zext_cmp_mask_narrower_re
 define i32 @low_bit_select_constants_bigger_false_same_size_result(i32 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_false_same_size_result:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    movl $43, %eax
-; CHECK-NEXT:    sbbl $0, %eax
+; CHECK-NEXT:    leal 42(%rdi), %eax
 ; CHECK-NEXT:    retq
   %a = and i32 %x, 1
   %c = icmp eq i32 %a, 0
@@ -106,10 +101,9 @@ define i32 @low_bit_select_constants_big
 define i64 @low_bit_select_constants_bigger_false_wider_result(i32 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_false_wider_result:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    movl $27, %eax
-; CHECK-NEXT:    sbbq $0, %rax
+; CHECK-NEXT:    leaq 26(%rdi), %rax
 ; CHECK-NEXT:    retq
   %a = and i32 %x, 1
   %c = icmp eq i32 %a, 0
@@ -120,10 +114,10 @@ define i64 @low_bit_select_constants_big
 define i16 @low_bit_select_constants_bigger_false_narrower_result(i32 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_false_narrower_result:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 ; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    cmpl $1, %edi
-; CHECK-NEXT:    movw $37, %ax
-; CHECK-NEXT:    sbbw $0, %ax
+; CHECK-NEXT:    leal 36(%rdi), %eax
+; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %a = and i32 %x, 1
   %c = icmp eq i32 %a, 0
@@ -134,9 +128,9 @@ define i16 @low_bit_select_constants_big
 define i8 @low_bit_select_constants_bigger_true_same_size_result(i8 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_same_size_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    orb $-30, %al
+; CHECK-NEXT:    andb $1, %dil
+; CHECK-NEXT:    movb $-29, %al
+; CHECK-NEXT:    subb %dil, %al
 ; CHECK-NEXT:    retq
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -147,10 +141,9 @@ define i8 @low_bit_select_constants_bigg
 define i32 @low_bit_select_constants_bigger_true_wider_result(i8 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_wider_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    orl $226, %eax
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    movl $227, %eax
+; CHECK-NEXT:    subl %edi, %eax
 ; CHECK-NEXT:    retq
   %a = and i8 %x, 1
   %c = icmp eq i8 %a, 0
@@ -161,9 +154,9 @@ define i32 @low_bit_select_constants_big
 define i8 @low_bit_select_constants_bigger_true_narrower_result(i16 %x) {
 ; CHECK-LABEL: low_bit_select_constants_bigger_true_narrower_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    sete %al
-; CHECK-NEXT:    orb $40, %al
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    movb $41, %al
+; CHECK-NEXT:    subb %dil, %al
 ; CHECK-NEXT:    retq
   %a = and i16 %x, 1
   %c = icmp eq i16 %a, 0