[llvm] r266858 - [AVX512] Add popcount support for v32i16 and v64i8.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 19 22:18:56 PDT 2016


Author: ctopper
Date: Wed Apr 20 00:18:55 2016
New Revision: 266858

URL: http://llvm.org/viewvc/llvm-project?rev=266858&view=rev
Log:
[AVX512] Add popcount support for v32i16 and v64i8.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=266858&r1=266857&r2=266858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Apr 20 00:18:55 2016
@@ -1528,6 +1528,7 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::SRA,          VT, Custom);
       setOperationAction(ISD::MLOAD,        VT, Legal);
       setOperationAction(ISD::MSTORE,       VT, Legal);
+      setOperationAction(ISD::CTPOP,        VT, Custom);
 
       setOperationPromotedToType(ISD::AND,  VT, MVT::v8i64);
       setOperationPromotedToType(ISD::OR,   VT, MVT::v8i64);
@@ -20580,7 +20581,7 @@ static SDValue LowerVectorCTPOPInRegLUT(
   int NumByteElts = VecSize / 8;
   MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts);
   SDValue In = DAG.getBitcast(ByteVecVT, Op);
-  SmallVector<SDValue, 16> LUTVec;
+  SmallVector<SDValue, 64> LUTVec;
   for (int i = 0; i < NumByteElts; ++i)
     LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
   SDValue InRegLUT = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, LUTVec);
@@ -20676,8 +20677,7 @@ static SDValue LowerVectorCTPOPBitmath(S
 static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
                                 SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
-  // FIXME: Need to add AVX-512 support here!
-  assert((VT.is256BitVector() || VT.is128BitVector()) &&
+  assert((VT.is512BitVector() || VT.is256BitVector() || VT.is128BitVector()) &&
          "Unknown CTPOP type to handle");
   SDLoc DL(Op.getNode());
   SDValue Op0 = Op.getOperand(0);

Modified: llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll?rev=266858&r1=266857&r2=266858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll Wed Apr 20 00:18:55 2016
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
 
 define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
 ; ALL-LABEL: testv8i64:
@@ -106,51 +107,78 @@ define <16 x i32> @testv16i32(<16 x i32>
 }
 
 define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
-; ALL-LABEL: testv32i16:
-; ALL:       ## BB#0:
-; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm3
-; ALL-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; ALL-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
-; ALL-NEXT:    vpsrlw $4, %ymm0, %ymm0
-; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm0
-; ALL-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
-; ALL-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
-; ALL-NEXT:    vpsllw $8, %ymm0, %ymm3
-; ALL-NEXT:    vpaddb %ymm0, %ymm3, %ymm0
-; ALL-NEXT:    vpsrlw $8, %ymm0, %ymm0
-; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm3
-; ALL-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
-; ALL-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm1
-; ALL-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
-; ALL-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
-; ALL-NEXT:    vpsllw $8, %ymm1, %ymm2
-; ALL-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
-; ALL-NEXT:    vpsrlw $8, %ymm1, %ymm1
-; ALL-NEXT:    retq
+; AVX512F-LABEL: testv32i16:
+; AVX512F:       ## BB#0:
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm3
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512F-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
+; AVX512F-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpsllw $8, %ymm0, %ymm3
+; AVX512F-NEXT:    vpaddb %ymm0, %ymm3, %ymm0
+; AVX512F-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm3
+; AVX512F-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
+; AVX512F-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT:    vpsllw $8, %ymm1, %ymm2
+; AVX512F-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT:    vpsrlw $8, %ymm1, %ymm1
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: testv32i16:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT:    vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpsllw $8, %zmm0, %zmm1
+; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
   %out = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %in)
   ret <32 x i16> %out
 }
 
 define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
-; ALL-LABEL: testv64i8:
-; ALL:       ## BB#0:
-; ALL-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm3
-; ALL-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; ALL-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
-; ALL-NEXT:    vpsrlw $4, %ymm0, %ymm0
-; ALL-NEXT:    vpand %ymm2, %ymm0, %ymm0
-; ALL-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
-; ALL-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
-; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm3
-; ALL-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
-; ALL-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; ALL-NEXT:    vpand %ymm2, %ymm1, %ymm1
-; ALL-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
-; ALL-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
-; ALL-NEXT:    retq
+; AVX512F-LABEL: testv64i8:
+; AVX512F:       ## BB#0:
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm3
+; AVX512F-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512F-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512F-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
+; AVX512F-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm3
+; AVX512F-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; AVX512F-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
+; AVX512F-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT:    retq
+;
+; AVX512BW-LABEL: testv64i8:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT:    vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT:    vpshufb %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT:    vpsrlw $4, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
+; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT:    retq
   %out = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %in)
   ret <64 x i8> %out
 }




More information about the llvm-commits mailing list