[llvm] r344457 - [X86][SSE] Remove most of vector CTTZ custom lowering and use LegalizeDAG instead.

Sat Oct 13 09:11:15 PDT 2018

Author: rksimon
Date: Sat Oct 13 09:11:15 2018
New Revision: 344457

URL: http://llvm.org/viewvc/llvm-project?rev=344457&view=rev
Log:
[X86][SSE] Remove most of vector CTTZ custom lowering and use LegalizeDAG instead. 

There is one remnant - AVX1 custom splitting of 256-bit vectors - which is due to a regression where the X86ISD::ANDNP is still performed as a YMM.

I've also tightened the CTLZ or CTPOP lowering in SelectionDAGLegalize::ExpandBitCount to require a legal CTLZ - it doesn't affect existing users and fixes an issue with AVX512 codegen.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=344457&r1=344456&r2=344457&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Sat Oct 13 09:11:15 2018
@@ -2814,8 +2814,8 @@ SDValue SelectionDAGLegalize::ExpandBitC
                                DAG.getNode(ISD::SUB, dl, VT, Op,
                                            DAG.getConstant(1, dl, VT)));
     // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
-    if (!TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
-        TLI.isOperationLegalOrCustom(ISD::CTLZ, VT))
+    if (!TLI.isOperationLegal(ISD::CTPOP, VT) &&
+        TLI.isOperationLegal(ISD::CTLZ, VT))
       return DAG.getNode(ISD::SUB, dl, VT,
                          DAG.getConstant(Len, dl, VT),
                          DAG.getNode(ISD::CTLZ, dl, VT, Tmp3));

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=344457&r1=344456&r2=344457&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Oct 13 09:11:15 2018
@@ -826,7 +826,6 @@ X86TargetLowering::X86TargetLowering(con
     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
       setOperationAction(ISD::SETCC,              VT, Custom);
       setOperationAction(ISD::CTPOP,              VT, Custom);
-      setOperationAction(ISD::CTTZ,               VT, Custom);
 
       // The condition codes aren't legal in SSE/AVX and under AVX512 we use
       // setcc all the way to isel and prefer SETGT in some isel patterns.
@@ -1083,9 +1082,11 @@ X86TargetLowering::X86TargetLowering(con
     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
       setOperationAction(ISD::SETCC,           VT, Custom);
       setOperationAction(ISD::CTPOP,           VT, Custom);
-      setOperationAction(ISD::CTTZ,            VT, Custom);
       setOperationAction(ISD::CTLZ,            VT, Custom);
 
+      // TODO - remove this once 256-bit X86ISD::ANDNP correctly split.
+      setOperationAction(ISD::CTTZ,  VT, HasInt256 ? Expand : Custom);
+
       // The condition codes aren't legal in SSE/AVX and under AVX512 we use
       // setcc all the way to isel and prefer SETGT in some isel patterns.
       setCondCodeAction(ISD::SETLT, VT, Custom);
@@ -1371,7 +1372,6 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::SHL,              VT, Custom);
       setOperationAction(ISD::SRA,              VT, Custom);
       setOperationAction(ISD::CTPOP,            VT, Custom);
-      setOperationAction(ISD::CTTZ,             VT, Custom);
       setOperationAction(ISD::ROTL,             VT, Custom);
       setOperationAction(ISD::ROTR,             VT, Custom);
       setOperationAction(ISD::SETCC,            VT, Custom);
@@ -1402,7 +1402,6 @@ X86TargetLowering::X86TargetLowering(con
       // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
       for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
         setOperationAction(ISD::CTLZ,            VT, Legal);
-        setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
       }
     } // Subtarget.hasCDI()
 
@@ -1491,7 +1490,6 @@ X86TargetLowering::X86TargetLowering(con
     if (Subtarget.hasCDI()) {
       for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
         setOperationAction(ISD::CTLZ,            VT, Legal);
-        setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
       }
     } // Subtarget.hasCDI()
 
@@ -1586,7 +1584,6 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::MLOAD,        VT, Legal);
       setOperationAction(ISD::MSTORE,       VT, Legal);
       setOperationAction(ISD::CTPOP,        VT, Custom);
-      setOperationAction(ISD::CTTZ,         VT, Custom);
       setOperationAction(ISD::CTLZ,         VT, Custom);
       setOperationAction(ISD::SMAX,         VT, Legal);
       setOperationAction(ISD::UMAX,         VT, Legal);
@@ -22999,29 +22996,11 @@ static SDValue LowerCTTZ(SDValue Op, con
   SDValue N0 = Op.getOperand(0);
   SDLoc dl(Op);
 
-  if (VT.isVector()) {
-    // Decompose 256-bit ops into smaller 128-bit ops.
-    if (VT.is256BitVector() && !Subtarget.hasInt256())
-      return Lower256IntUnary(Op, DAG);
-
-    // cttz(x) = width - ctlz(~x & (x - 1))
-    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-    if (TLI.isOperationLegal(ISD::CTLZ, VT) &&
-        !TLI.isOperationLegal(ISD::CTPOP, VT)) {
-      SDValue One = DAG.getConstant(1, dl, VT);
-      SDValue Width = DAG.getConstant(NumBits, dl, VT);
-      return DAG.getNode(
-          ISD::SUB, dl, VT, Width,
-          DAG.getNode(ISD::CTLZ, dl, VT,
-                      DAG.getNode(ISD::AND, dl, VT, DAG.getNOT(dl, N0, VT),
-                                  DAG.getNode(ISD::SUB, dl, VT, N0, One))));
-    }
-
-    // Else leave it to the legalizer.
-    return SDValue();
-  }
+  // Decompose 256-bit ops into smaller 128-bit ops.
+  if (VT.is256BitVector() && !Subtarget.hasInt256())
+    return Lower256IntUnary(Op, DAG);
 
-  assert(Op.getOpcode() == ISD::CTTZ &&
+  assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ &&
          "Only scalar CTTZ requires custom lowering");
 
   // Issue a bsf (scan bits forward) which also sets EFLAGS.