[llvm] r344349 - [SelectionDAG] Move VectorLegalizer::ExpandCTLZ codegen into SelectionDAGLegalize

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 12 07:45:57 PDT 2018


Author: rksimon
Date: Fri Oct 12 07:45:57 2018
New Revision: 344349

URL: http://llvm.org/viewvc/llvm-project?rev=344349&view=rev
Log:
[SelectionDAG] Move VectorLegalizer::ExpandCTLZ codegen into SelectionDAGLegalize

Generalize SelectionDAGLegalize's CTLZ expansion to handle vectors - lets VectorLegalizer::ExpandCTLZ to just pass the expansion on instead of repeating the same codegen.

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
    llvm/trunk/test/CodeGen/X86/vec_ctbits.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp?rev=344349&r1=344348&r2=344349&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp Fri Oct 12 07:45:57 2018
@@ -2761,7 +2761,7 @@ SDValue SelectionDAGLegalize::ExpandBitC
     return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
   case ISD::CTLZ: {
     EVT VT = Op.getValueType();
-    unsigned Len = VT.getSizeInBits();
+    unsigned Len = VT.getScalarSizeInBits();
 
     if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
       EVT SetCCVT = getSetCCResultType(VT);

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp?rev=344349&r1=344348&r2=344349&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp Fri Oct 12 07:45:57 2018
@@ -1082,32 +1082,13 @@ SDValue VectorLegalizer::ExpandCTLZ(SDVa
     return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
   }
 
-  // If CTPOP is available we can lower with a CTPOP based method:
-  // u16 ctlz(u16 x) {
-  //   x |= (x >> 1);
-  //   x |= (x >> 2);
-  //   x |= (x >> 4);
-  //   x |= (x >> 8);
-  //   return ctpop(~x);
-  // }
-  // Ref: "Hacker's Delight" by Henry Warren
+  // If we have the appropriate vector bit operations, it is better to use them
+  // than unrolling and expanding each component.
   if (isPowerOf2_32(NumBitsPerElt) &&
       TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
       TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
-      TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
-      TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
-    SDLoc DL(Op);
-    SDValue Res = Op.getOperand(0);
-    EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-
-    for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
-      Res = DAG.getNode(
-          ISD::OR, DL, VT, Res,
-          DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
-
-    Res = DAG.getNOT(DL, Res, VT);
-    return DAG.getNode(ISD::CTPOP, DL, VT, Res);
-  }
+      TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
+    return Op;
 
   // Otherwise go ahead and unroll.
   return DAG.UnrollVectorOp(Op.getNode());

Modified: llvm/trunk/test/CodeGen/X86/vec_ctbits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ctbits.ll?rev=344349&r1=344348&r2=344349&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_ctbits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ctbits.ll Fri Oct 12 07:45:57 2018
@@ -142,42 +142,42 @@ define <2 x i32> @promlz(<2 x i32> %a) n
 ; CHECK-LABEL: promlz:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    movdqa %xmm0, %xmm2
-; CHECK-NEXT:    psrlq $1, %xmm2
-; CHECK-NEXT:    por %xmm0, %xmm2
-; CHECK-NEXT:    movdqa %xmm2, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlq $1, %xmm1
+; CHECK-NEXT:    por %xmm0, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
 ; CHECK-NEXT:    psrlq $2, %xmm0
-; CHECK-NEXT:    por %xmm2, %xmm0
-; CHECK-NEXT:    movdqa %xmm0, %xmm2
-; CHECK-NEXT:    psrlq $4, %xmm2
-; CHECK-NEXT:    por %xmm0, %xmm2
-; CHECK-NEXT:    movdqa %xmm2, %xmm0
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlq $4, %xmm1
+; CHECK-NEXT:    por %xmm0, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
 ; CHECK-NEXT:    psrlq $8, %xmm0
-; CHECK-NEXT:    por %xmm2, %xmm0
-; CHECK-NEXT:    movdqa %xmm0, %xmm2
-; CHECK-NEXT:    psrlq $16, %xmm2
-; CHECK-NEXT:    por %xmm0, %xmm2
-; CHECK-NEXT:    movdqa %xmm2, %xmm0
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    movdqa %xmm0, %xmm1
+; CHECK-NEXT:    psrlq $16, %xmm1
+; CHECK-NEXT:    por %xmm0, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
 ; CHECK-NEXT:    psrlq $32, %xmm0
-; CHECK-NEXT:    por %xmm2, %xmm0
-; CHECK-NEXT:    pcmpeqd %xmm2, %xmm2
-; CHECK-NEXT:    pxor %xmm0, %xmm2
-; CHECK-NEXT:    movdqa %xmm2, %xmm0
+; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
+; CHECK-NEXT:    pxor %xmm0, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
 ; CHECK-NEXT:    psrlw $1, %xmm0
 ; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    psubb %xmm0, %xmm2
+; CHECK-NEXT:    psubb %xmm0, %xmm1
 ; CHECK-NEXT:    movdqa {{.*#+}} xmm0 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
-; CHECK-NEXT:    movdqa %xmm2, %xmm3
-; CHECK-NEXT:    pand %xmm0, %xmm3
-; CHECK-NEXT:    psrlw $2, %xmm2
+; CHECK-NEXT:    movdqa %xmm1, %xmm2
 ; CHECK-NEXT:    pand %xmm0, %xmm2
-; CHECK-NEXT:    paddb %xmm3, %xmm2
-; CHECK-NEXT:    movdqa %xmm2, %xmm0
-; CHECK-NEXT:    psrlw $4, %xmm0
-; CHECK-NEXT:    paddb %xmm2, %xmm0
-; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    psadbw %xmm1, %xmm0
+; CHECK-NEXT:    psrlw $2, %xmm1
+; CHECK-NEXT:    pand %xmm0, %xmm1
+; CHECK-NEXT:    paddb %xmm2, %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm2
+; CHECK-NEXT:    psrlw $4, %xmm2
+; CHECK-NEXT:    paddb %xmm1, %xmm2
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm2
+; CHECK-NEXT:    pxor %xmm0, %xmm0
+; CHECK-NEXT:    psadbw %xmm2, %xmm0
 ; CHECK-NEXT:    psubq {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %c = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)




More information about the llvm-commits mailing list