[llvm] 56b3339 - [SelectionDAG] Move ISD:PARITY formation from DAGCombine to SimplifyDemandedBits.

Sun Sep 13 21:05:12 PDT 2020

Author: Craig Topper
Date: 2020-09-13T21:04:13-07:00
New Revision: 56b33391d3a42ef8e6fd1bcdcbcbb72bfb562092

URL: https://github.com/llvm/llvm-project/commit/56b33391d3a42ef8e6fd1bcdcbcbb72bfb562092
DIFF: https://github.com/llvm/llvm-project/commit/56b33391d3a42ef8e6fd1bcdcbcbb72bfb562092.diff

LOG: [SelectionDAG] Move ISD:PARITY formation from DAGCombine to SimplifyDemandedBits.

Previously, we formed ISD::PARITY by looking for (and (ctpop X), 1)
but the AND might be separated from the ctpop. For example if the
parity result is multiplied by 2, we'll pull the AND through the
shift.

So to handle more cases, move to SimplifyDemandedBits where we
can handle more cases that result in only the LSB of the CTPOP
being used.

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/X86/parity.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ae976af6557e..e4a517601968 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5574,25 +5574,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
       return V;
 
-  // fold (and (ctpop X), 1) -> parity X
-  // Only do this before op legalization as it might be turned back into ctpop.
-  // TODO: Support vectors?
-  if (!LegalOperations && isOneConstant(N1) && N0.hasOneUse()) {
-    SDValue Tmp = N0;
-
-    // It's possible the ctpop has been truncated, but since we only care about
-    // the LSB we can look through it.
-    if (Tmp.getOpcode() == ISD::TRUNCATE && Tmp.getOperand(0).hasOneUse())
-      Tmp = Tmp.getOperand(0);
-
-    if (Tmp.getOpcode() == ISD::CTPOP) {
-      SDLoc dl(N);
-      SDValue Parity =
-          DAG.getNode(ISD::PARITY, dl, Tmp.getValueType(), Tmp.getOperand(0));
-      return DAG.getNode(ISD::TRUNCATE, dl, VT, Parity);
-    }
-  }
-
   return SDValue();
 }
 

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 1cc2ec77ebce..93b40803089e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -3053,6 +3053,11 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
     Known.Zero.setBitsFrom(Log2_32(PossibleOnes) + 1);
     break;
   }
+  case ISD::PARITY: {
+    // Parity returns 0 everywhere but the LSB.
+    Known.Zero.setBitsFrom(1);
+    break;
+  }
   case ISD::LOAD: {
     LoadSDNode *LD = cast<LoadSDNode>(Op);
     const Constant *Cst = TLI->getTargetConstantFromLoad(LD);

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ea2344e4f551..b7f5ab3d6b85 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1748,6 +1748,17 @@ bool TargetLowering::SimplifyDemandedBits(
     Known.Zero = Known2.Zero.byteSwap();
     break;
   }
+  case ISD::CTPOP: {
+    // If only 1 bit is demanded, replace with PARITY as long as we're before
+    // op legalization.
+    // FIXME: Limit to scalars for now.
+    if (DemandedBits.isOneValue() && !TLO.LegalOps && !VT.isVector())
+      return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::PARITY, dl, VT,
+                                               Op.getOperand(0)));
+
+    Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+    break;
+  }
   case ISD::SIGN_EXTEND_INREG: {
     SDValue Op0 = Op.getOperand(0);
     EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();

diff  --git a/llvm/test/CodeGen/X86/parity.ll b/llvm/test/CodeGen/X86/parity.ll
index d7344a4a2ed7..4bc225cba547 100644
--- a/llvm/test/CodeGen/X86/parity.ll
+++ b/llvm/test/CodeGen/X86/parity.ll
@@ -422,6 +422,100 @@ define i32 @parity_8_mask(i32 %x) {
   ret i32 %c
 }
 
+define i32 @parity_32_shift(i32 %0) {
+; X86-NOPOPCNT-LABEL: parity_32_shift:
+; X86-NOPOPCNT:       # %bb.0:
+; X86-NOPOPCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOPOPCNT-NEXT:    movl %eax, %ecx
+; X86-NOPOPCNT-NEXT:    shrl $16, %ecx
+; X86-NOPOPCNT-NEXT:    xorl %eax, %ecx
+; X86-NOPOPCNT-NEXT:    xorl %eax, %eax
+; X86-NOPOPCNT-NEXT:    xorb %ch, %cl
+; X86-NOPOPCNT-NEXT:    setnp %al
+; X86-NOPOPCNT-NEXT:    addl %eax, %eax
+; X86-NOPOPCNT-NEXT:    retl
+;
+; X64-NOPOPCNT-LABEL: parity_32_shift:
+; X64-NOPOPCNT:       # %bb.0:
+; X64-NOPOPCNT-NEXT:    movl %edi, %ecx
+; X64-NOPOPCNT-NEXT:    shrl $16, %ecx
+; X64-NOPOPCNT-NEXT:    xorl %edi, %ecx
+; X64-NOPOPCNT-NEXT:    xorl %eax, %eax
+; X64-NOPOPCNT-NEXT:    xorb %ch, %cl
+; X64-NOPOPCNT-NEXT:    setnp %al
+; X64-NOPOPCNT-NEXT:    addl %eax, %eax
+; X64-NOPOPCNT-NEXT:    retq
+;
+; X86-POPCNT-LABEL: parity_32_shift:
+; X86-POPCNT:       # %bb.0:
+; X86-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT:    andl $1, %eax
+; X86-POPCNT-NEXT:    addl %eax, %eax
+; X86-POPCNT-NEXT:    retl
+;
+; X64-POPCNT-LABEL: parity_32_shift:
+; X64-POPCNT:       # %bb.0:
+; X64-POPCNT-NEXT:    popcntl %edi, %eax
+; X64-POPCNT-NEXT:    andl $1, %eax
+; X64-POPCNT-NEXT:    addl %eax, %eax
+; X64-POPCNT-NEXT:    retq
+  %2 = tail call i32 @llvm.ctpop.i32(i32 %0)
+  %3 = shl nuw nsw i32 %2, 1
+  %4 = and i32 %3, 2
+  ret i32 %4
+}
+
+define i64 @parity_64_shift(i64 %0) {
+; X86-NOPOPCNT-LABEL: parity_64_shift:
+; X86-NOPOPCNT:       # %bb.0:
+; X86-NOPOPCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOPOPCNT-NEXT:    xorl {{[0-9]+}}(%esp), %eax
+; X86-NOPOPCNT-NEXT:    movl %eax, %ecx
+; X86-NOPOPCNT-NEXT:    shrl $16, %ecx
+; X86-NOPOPCNT-NEXT:    xorl %eax, %ecx
+; X86-NOPOPCNT-NEXT:    xorl %eax, %eax
+; X86-NOPOPCNT-NEXT:    xorb %ch, %cl
+; X86-NOPOPCNT-NEXT:    setnp %al
+; X86-NOPOPCNT-NEXT:    addl %eax, %eax
+; X86-NOPOPCNT-NEXT:    xorl %edx, %edx
+; X86-NOPOPCNT-NEXT:    retl
+;
+; X64-NOPOPCNT-LABEL: parity_64_shift:
+; X64-NOPOPCNT:       # %bb.0:
+; X64-NOPOPCNT-NEXT:    movq %rdi, %rax
+; X64-NOPOPCNT-NEXT:    shrq $32, %rax
+; X64-NOPOPCNT-NEXT:    xorl %edi, %eax
+; X64-NOPOPCNT-NEXT:    movl %eax, %ecx
+; X64-NOPOPCNT-NEXT:    shrl $16, %ecx
+; X64-NOPOPCNT-NEXT:    xorl %eax, %ecx
+; X64-NOPOPCNT-NEXT:    xorl %eax, %eax
+; X64-NOPOPCNT-NEXT:    xorb %ch, %cl
+; X64-NOPOPCNT-NEXT:    setnp %al
+; X64-NOPOPCNT-NEXT:    addq %rax, %rax
+; X64-NOPOPCNT-NEXT:    retq
+;
+; X86-POPCNT-LABEL: parity_64_shift:
+; X86-POPCNT:       # %bb.0:
+; X86-POPCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT:    xorl {{[0-9]+}}(%esp), %eax
+; X86-POPCNT-NEXT:    popcntl %eax, %eax
+; X86-POPCNT-NEXT:    andl $1, %eax
+; X86-POPCNT-NEXT:    addl %eax, %eax
+; X86-POPCNT-NEXT:    xorl %edx, %edx
+; X86-POPCNT-NEXT:    retl
+;
+; X64-POPCNT-LABEL: parity_64_shift:
+; X64-POPCNT:       # %bb.0:
+; X64-POPCNT-NEXT:    popcntq %rdi, %rax
+; X64-POPCNT-NEXT:    andl $1, %eax
+; X64-POPCNT-NEXT:    addq %rax, %rax
+; X64-POPCNT-NEXT:    retq
+  %2 = tail call i64 @llvm.ctpop.i64(i64 %0)
+  %3 = shl nuw nsw i64 %2, 1
+  %4 = and i64 %3, 2
+  ret i64 %4
+}
+
 declare i4 @llvm.ctpop.i4(i4 %x)
 declare i8 @llvm.ctpop.i8(i8 %x)
 declare i16 @llvm.ctpop.i16(i16 %x)