[llvm] r266858 - [AVX512] Add popcount support for v32i16 and v64i8.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 19 22:18:56 PDT 2016
Author: ctopper
Date: Wed Apr 20 00:18:55 2016
New Revision: 266858
URL: http://llvm.org/viewvc/llvm-project?rev=266858&view=rev
Log:
[AVX512] Add popcount support for v32i16 and v64i8.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=266858&r1=266857&r2=266858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Apr 20 00:18:55 2016
@@ -1528,6 +1528,7 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::MLOAD, VT, Legal);
setOperationAction(ISD::MSTORE, VT, Legal);
+ setOperationAction(ISD::CTPOP, VT, Custom);
setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
@@ -20580,7 +20581,7 @@ static SDValue LowerVectorCTPOPInRegLUT(
int NumByteElts = VecSize / 8;
MVT ByteVecVT = MVT::getVectorVT(MVT::i8, NumByteElts);
SDValue In = DAG.getBitcast(ByteVecVT, Op);
- SmallVector<SDValue, 16> LUTVec;
+ SmallVector<SDValue, 64> LUTVec;
for (int i = 0; i < NumByteElts; ++i)
LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
SDValue InRegLUT = DAG.getNode(ISD::BUILD_VECTOR, DL, ByteVecVT, LUTVec);
@@ -20676,8 +20677,7 @@ static SDValue LowerVectorCTPOPBitmath(S
static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
MVT VT = Op.getSimpleValueType();
- // FIXME: Need to add AVX-512 support here!
- assert((VT.is256BitVector() || VT.is128BitVector()) &&
+ assert((VT.is512BitVector() || VT.is256BitVector() || VT.is128BitVector()) &&
"Unknown CTPOP type to handle");
SDLoc DL(Op.getNode());
SDValue Op0 = Op.getOperand(0);
Modified: llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll?rev=266858&r1=266857&r2=266858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll Wed Apr 20 00:18:55 2016
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512CD
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
; ALL-LABEL: testv8i64:
@@ -106,51 +107,78 @@ define <16 x i32> @testv16i32(<16 x i32>
}
define <32 x i16> @testv32i16(<32 x i16> %in) nounwind {
-; ALL-LABEL: testv32i16:
-; ALL: ## BB#0:
-; ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; ALL-NEXT: vpand %ymm2, %ymm0, %ymm3
-; ALL-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; ALL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
-; ALL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; ALL-NEXT: vpand %ymm2, %ymm0, %ymm0
-; ALL-NEXT: vpshufb %ymm0, %ymm4, %ymm0
-; ALL-NEXT: vpaddb %ymm3, %ymm0, %ymm0
-; ALL-NEXT: vpsllw $8, %ymm0, %ymm3
-; ALL-NEXT: vpaddb %ymm0, %ymm3, %ymm0
-; ALL-NEXT: vpsrlw $8, %ymm0, %ymm0
-; ALL-NEXT: vpand %ymm2, %ymm1, %ymm3
-; ALL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
-; ALL-NEXT: vpsrlw $4, %ymm1, %ymm1
-; ALL-NEXT: vpand %ymm2, %ymm1, %ymm1
-; ALL-NEXT: vpshufb %ymm1, %ymm4, %ymm1
-; ALL-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; ALL-NEXT: vpsllw $8, %ymm1, %ymm2
-; ALL-NEXT: vpaddb %ymm1, %ymm2, %ymm1
-; ALL-NEXT: vpsrlw $8, %ymm1, %ymm1
-; ALL-NEXT: retq
+; AVX512F-LABEL: testv32i16:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm3
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
+; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpsllw $8, %ymm0, %ymm3
+; AVX512F-NEXT: vpaddb %ymm0, %ymm3, %ymm0
+; AVX512F-NEXT: vpsrlw $8, %ymm0, %ymm0
+; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3
+; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
+; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpsllw $8, %ymm1, %ymm2
+; AVX512F-NEXT: vpaddb %ymm1, %ymm2, %ymm1
+; AVX512F-NEXT: vpsrlw $8, %ymm1, %ymm1
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: testv32i16:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpsllw $8, %zmm0, %zmm1
+; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: vpsrlw $8, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
%out = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %in)
ret <32 x i16> %out
}
define <64 x i8> @testv64i8(<64 x i8> %in) nounwind {
-; ALL-LABEL: testv64i8:
-; ALL: ## BB#0:
-; ALL-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; ALL-NEXT: vpand %ymm2, %ymm0, %ymm3
-; ALL-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; ALL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
-; ALL-NEXT: vpsrlw $4, %ymm0, %ymm0
-; ALL-NEXT: vpand %ymm2, %ymm0, %ymm0
-; ALL-NEXT: vpshufb %ymm0, %ymm4, %ymm0
-; ALL-NEXT: vpaddb %ymm3, %ymm0, %ymm0
-; ALL-NEXT: vpand %ymm2, %ymm1, %ymm3
-; ALL-NEXT: vpshufb %ymm3, %ymm4, %ymm3
-; ALL-NEXT: vpsrlw $4, %ymm1, %ymm1
-; ALL-NEXT: vpand %ymm2, %ymm1, %ymm1
-; ALL-NEXT: vpshufb %ymm1, %ymm4, %ymm1
-; ALL-NEXT: vpaddb %ymm3, %ymm1, %ymm1
-; ALL-NEXT: retq
+; AVX512F-LABEL: testv64i8:
+; AVX512F: ## BB#0:
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm3
+; AVX512F-NEXT: vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT: vpshufb %ymm0, %ymm4, %ymm0
+; AVX512F-NEXT: vpaddb %ymm3, %ymm0, %ymm0
+; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm3
+; AVX512F-NEXT: vpshufb %ymm3, %ymm4, %ymm3
+; AVX512F-NEXT: vpsrlw $4, %ymm1, %ymm1
+; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
+; AVX512F-NEXT: vpshufb %ymm1, %ymm4, %ymm1
+; AVX512F-NEXT: vpaddb %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: retq
+;
+; AVX512BW-LABEL: testv64i8:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm2
+; AVX512BW-NEXT: vmovdqu8 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2
+; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
+; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0
+; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0
+; AVX512BW-NEXT: retq
%out = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %in)
ret <64 x i8> %out
}
More information about the llvm-commits
mailing list