[llvm] r303858 - [X86] Adding vpopcntd and vpopcntq instructions

Oren Ben Simhon via llvm-commits llvm-commits at lists.llvm.org
Thu May 25 06:45:23 PDT 2017


Author: orenb
Date: Thu May 25 08:45:23 2017
New Revision: 303858

URL: http://llvm.org/viewvc/llvm-project?rev=303858&view=rev
Log:
[X86] Adding vpopcntd and vpopcntq instructions

AVX512_VPOPCNTDQ is a new feature set that was published by Intel.
The patch represents the LLVM side of the addition of two new intrinsic based instructions (vpopcntd and vpopcntq).

Differential Revision: https://reviews.llvm.org/D33169


Added:
    llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-intrinsics.ll
    llvm/trunk/test/MC/X86/x86-64-avx512vpopcntdq.s
Modified:
    llvm/trunk/lib/Support/Host.cpp
    llvm/trunk/lib/Target/X86/X86.td
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.td
    llvm/trunk/lib/Target/X86/X86Subtarget.cpp
    llvm/trunk/lib/Target/X86/X86Subtarget.h
    llvm/trunk/test/CodeGen/X86/vector-popcnt-128.ll
    llvm/trunk/test/CodeGen/X86/vector-popcnt-256.ll
    llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll
    llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll
    llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll
    llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll
    llvm/trunk/test/MC/Disassembler/X86/avx-512.txt

Modified: llvm/trunk/lib/Support/Host.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Host.cpp?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Host.cpp (original)
+++ llvm/trunk/lib/Support/Host.cpp Thu May 25 08:45:23 2017
@@ -1390,6 +1390,7 @@ bool sys::getHostCPUFeatures(StringMap<b
   Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
 
   // AVX512 is only supported if the OS supports the context save for it.
+  Features["avx512vpopcntdq"] = HasLeaf7 && ((EBX >> 14) & 1) && HasAVX512Save;
   Features["avx512f"] = HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save;
   Features["avx512dq"] = HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save;
   Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;

Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Thu May 25 08:45:23 2017
@@ -127,6 +127,9 @@ def FeatureERI      : SubtargetFeature<"
 def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
                       "Enable AVX-512 Conflict Detection Instructions",
                                       [FeatureAVX512]>;
+def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
+                       "true", "Enable AVX-512 Population Count Instructions",
+                                      [FeatureAVX512]>;
 def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
                       "Enable AVX-512 PreFetch Instructions",
                                       [FeatureAVX512]>;

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu May 25 08:45:23 2017
@@ -1364,6 +1364,14 @@ X86TargetLowering::X86TargetLowering(con
       setOperationAction(ISD::MUL,             MVT::v8i64, Legal);
     }
 
+    if (Subtarget.hasVPOPCNTDQ()) {
+      // VPOPCNTDQ sub-targets extend 128/256 vectors to use the avx512
+      // version of popcntd/q.
+      for (auto VT : {MVT::v16i32, MVT::v8i64, MVT::v8i32, MVT::v4i64,
+                      MVT::v4i32, MVT::v2i64})
+        setOperationAction(ISD::CTPOP, VT, Legal);
+    }
+
     // Custom lower several nodes.
     for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
                      MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu May 25 08:45:23 2017
@@ -8649,6 +8649,41 @@ let Predicates = [HasCDI, NoVLX] in {
 }
 
 //===---------------------------------------------------------------------===//
+// Counts number of ones - VPOPCNTD and VPOPCNTQ
+//===---------------------------------------------------------------------===//
+
+multiclass avx512_unary_rmb_popcnt<bits<8> opc, string OpcodeStr, X86VectorVTInfo VTInfo> {
+  let Predicates = [HasVPOPCNTDQ] in
+    defm Z : avx512_unary_rmb<opc, OpcodeStr, ctpop, VTInfo>, EVEX_V512;
+}
+
+// Use 512bit version to implement 128/256 bit.
+multiclass avx512_unary_lowering<SDNode OpNode, AVX512VLVectorVTInfo _, Predicate prd> {
+  let Predicates = [prd] in {
+    def Z256_Alt : Pat<(_.info256.VT(OpNode _.info256.RC:$src1)),
+                       (EXTRACT_SUBREG
+                         (!cast<Instruction>(NAME # "Zrr")
+                           (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
+                                          _.info256.RC:$src1,
+                                          _.info256.SubRegIdx)),
+                       _.info256.SubRegIdx)>;
+
+    def Z128_Alt : Pat<(_.info128.VT(OpNode _.info128.RC:$src1)),
+                       (EXTRACT_SUBREG
+                         (!cast<Instruction>(NAME # "Zrr")
+                           (INSERT_SUBREG(_.info512.VT(IMPLICIT_DEF)),
+                                          _.info128.RC:$src1,
+                                          _.info128.SubRegIdx)),
+                       _.info128.SubRegIdx)>;
+  }
+}
+
+defm VPOPCNTD : avx512_unary_rmb_popcnt<0x55, "vpopcntd", v16i32_info>,
+                avx512_unary_lowering<ctpop, avx512vl_i32_info, HasVPOPCNTDQ>;
+defm VPOPCNTQ : avx512_unary_rmb_popcnt<0x55, "vpopcntq", v8i64_info>,
+                avx512_unary_lowering<ctpop, avx512vl_i64_info, HasVPOPCNTDQ>, VEX_W;
+
+//===---------------------------------------------------------------------===//
 // Replicate Single FP - MOVSHDUP and MOVSLDUP
 //===---------------------------------------------------------------------===//
 multiclass avx512_replicate<bits<8> opc, string OpcodeStr, SDNode OpNode>{

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Thu May 25 08:45:23 2017
@@ -914,6 +914,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMOVZXDQZrr,     X86::VPMOVZXDQZrm,       0 },
     { X86::VPMOVZXWDZrr,     X86::VPMOVZXWDZrm,       0 },
     { X86::VPMOVZXWQZrr,     X86::VPMOVZXWQZrm,       0 },
+    { X86::VPOPCNTDZrr,      X86::VPOPCNTDZrm,        0 },
+    { X86::VPOPCNTQZrr,      X86::VPOPCNTQZrm,        0 },
     { X86::VPSHUFDZri,       X86::VPSHUFDZmi,         0 },
     { X86::VPSHUFHWZri,      X86::VPSHUFHWZmi,        0 },
     { X86::VPSHUFLWZri,      X86::VPSHUFLWZmi,        0 },
@@ -2326,6 +2328,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMOVZXDQZrrkz,    X86::VPMOVZXDQZrmkz,      0 },
     { X86::VPMOVZXWDZrrkz,    X86::VPMOVZXWDZrmkz,      0 },
     { X86::VPMOVZXWQZrrkz,    X86::VPMOVZXWQZrmkz,      0 },
+    { X86::VPOPCNTDZrrkz,     X86::VPOPCNTDZrmkz,       0 },
+    { X86::VPOPCNTQZrrkz,     X86::VPOPCNTQZrmkz,       0 },
     { X86::VPSHUFDZrikz,      X86::VPSHUFDZmikz,        0 },
     { X86::VPSHUFHWZrikz,     X86::VPSHUFHWZmikz,       0 },
     { X86::VPSHUFLWZrikz,     X86::VPSHUFLWZmikz,       0 },
@@ -2947,6 +2951,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget
     { X86::VPMOVZXDQZrrk,         X86::VPMOVZXDQZrmk,         0 },
     { X86::VPMOVZXWDZrrk,         X86::VPMOVZXWDZrmk,         0 },
     { X86::VPMOVZXWQZrrk,         X86::VPMOVZXWQZrmk,         0 },
+    { X86::VPOPCNTDZrrk,          X86::VPOPCNTDZrmk,          0 },
+    { X86::VPOPCNTQZrrk,          X86::VPOPCNTQZrmk,          0 },
     { X86::VPSHUFDZrik,           X86::VPSHUFDZmik,           0 },
     { X86::VPSHUFHWZrik,          X86::VPSHUFHWZmik,          0 },
     { X86::VPSHUFLWZrik,          X86::VPSHUFLWZmik,          0 },

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Thu May 25 08:45:23 2017
@@ -813,6 +813,8 @@ def UseAVX2      : Predicate<"Subtarget-
 def NoAVX512     : Predicate<"!Subtarget->hasAVX512()">;
 def HasCDI       : Predicate<"Subtarget->hasCDI()">,
                      AssemblerPredicate<"FeatureCDI", "AVX-512 CD ISA">;
+def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">,
+                   AssemblerPredicate<"FeatureVPOPCNTDQ", "AVX-512 VPOPCNTDQ ISA">;
 def HasPFI       : Predicate<"Subtarget->hasPFI()">,
                      AssemblerPredicate<"FeaturePFI", "AVX-512 PF ISA">;
 def HasERI       : Predicate<"Subtarget->hasERI()">,

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Thu May 25 08:45:23 2017
@@ -286,6 +286,7 @@ void X86Subtarget::initializeEnvironment
   HasCDI = false;
   HasPFI = false;
   HasDQI = false;
+  HasVPOPCNTDQ = false;
   HasBWI = false;
   HasVLX = false;
   HasADX = false;

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Thu May 25 08:45:23 2017
@@ -270,6 +270,9 @@ protected:
   /// Processor has AVX-512 Conflict Detection Instructions
   bool HasCDI;
 
+  /// Processor has AVX-512 population count Instructions
+  bool HasVPOPCNTDQ;
+
   /// Processor has AVX-512 Doubleword and Quadword instructions
   bool HasDQI;
 
@@ -494,6 +497,7 @@ public:
   bool slow3OpsLEA() const { return Slow3OpsLEA; }
   bool slowIncDec() const { return SlowIncDec; }
   bool hasCDI() const { return HasCDI; }
+  bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; }
   bool hasPFI() const { return HasPFI; }
   bool hasERI() const { return HasERI; }
   bool hasDQI() const { return HasDQI; }

Added: llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-intrinsics.ll?rev=303858&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-intrinsics.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512vpopcntdq-intrinsics.ll Thu May 25 08:45:23 2017
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq --show-mc-encoding | FileCheck %s --check-prefix=X86_64
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vpopcntdq --show-mc-encoding | FileCheck %s --check-prefix=X86
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; The following tests check that patterns that includes      ;;
+;; ctpop intrinsic + select are translated to the vpopcntd/q  ;;
+;; instruction in a correct way.                              ;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+define <16 x i32> @test_mask_vpopcnt_d(<16 x i32> %a, i16 %mask, <16 x i32> %b) {
+; X86_64-LABEL: test_mask_vpopcnt_d:
+; X86_64:       # BB#0:
+; X86_64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X86_64-NEXT:    vpopcntd %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x55,0xc1]
+; X86_64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mask_vpopcnt_d:
+; X86:       # BB#0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpopcntd %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x55,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %1 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %b)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %a
+  ret <16 x i32> %3
+}
+
+define <16 x i32> @test_maskz_vpopcnt_d(i16 %mask, <16 x i32> %a) {
+; X86_64-LABEL: test_maskz_vpopcnt_d:
+; X86_64:       # BB#0:
+; X86_64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X86_64-NEXT:    vpopcntd %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x55,0xc0]
+; X86_64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_maskz_vpopcnt_d:
+; X86:       # BB#0:
+; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
+; X86-NEXT:    vpopcntd %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x55,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %1 = tail call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %a)
+  %2 = bitcast i16 %mask to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+  ret <16 x i32> %3
+}
+
+define <8 x i64> @test_mask_vpopcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
+; X86_64-LABEL: test_mask_vpopcnt_q:
+; X86_64:       # BB#0:
+; X86_64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X86_64-NEXT:    vpopcntq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x55,0xc8]
+; X86_64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86_64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_mask_vpopcnt_q:
+; X86:       # BB#0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpopcntq %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x55,0xc8]
+; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %1 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %b
+  ret <8 x i64> %3
+}
+
+define <8 x i64> @test_maskz_vpopcnt_q(<8 x i64> %a, i8 %mask) {
+; X86_64-LABEL: test_maskz_vpopcnt_q:
+; X86_64:       # BB#0:
+; X86_64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
+; X86_64-NEXT:    vpopcntq %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x55,0xc0]
+; X86_64-NEXT:    retq # encoding: [0xc3]
+;
+; X86-LABEL: test_maskz_vpopcnt_q:
+; X86:       # BB#0:
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
+; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
+; X86-NEXT:    vpopcntq %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x55,0xc0]
+; X86-NEXT:    retl # encoding: [0xc3]
+  %1 = tail call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %a)
+  %2 = bitcast i8 %mask to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
+  ret <8 x i64> %3
+}
+
+declare <16 x i32> @llvm.ctpop.v16i32(<16 x i32>)
+declare <8 x i64> @llvm.ctpop.v8i64(<8 x i64>)

Modified: llvm/trunk/test/CodeGen/X86/vector-popcnt-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-popcnt-128.ll?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-popcnt-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-popcnt-128.ll Thu May 25 08:45:23 2017
@@ -5,6 +5,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
 
 define <2 x i64> @testv2i64(<2 x i64> %in) nounwind {
 ; SSE2-LABEL: testv2i64:
@@ -81,19 +82,41 @@ define <2 x i64> @testv2i64(<2 x i64> %i
 ; SSE41-NEXT:    psadbw %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: testv2i64:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm2
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
-; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
-; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
-; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: testv2i64:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: testv2i64:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
+; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv2i64:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512VPOPCNTDQ-NEXT:    vzeroupper
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %in)
   ret <2 x i64> %out
 }
@@ -193,23 +216,49 @@ define <4 x i32> @testv4i32(<4 x i32> %i
 ; SSE41-NEXT:    packuswb %xmm3, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX-LABEL: testv4i32:
-; AVX:       # BB#0:
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
-; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm2
-; AVX-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
-; AVX-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
-; AVX-NEXT:    vpsrlw $4, %xmm0, %xmm0
-; AVX-NEXT:    vpand %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
-; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
-; AVX-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
-; AVX-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
-; AVX-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
-; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
-; AVX-NEXT:    retq
+; AVX1-LABEL: testv4i32:
+; AVX1:       # BB#0:
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX1-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX1-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX1-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
+; AVX1-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX1-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
+; AVX1-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX1-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+;
+; AVX2-LABEL: testv4i32:
+; AVX2:       # BB#0:
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX2-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX2-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
+; AVX2-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
+; AVX2-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX2-NEXT:    vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
+; AVX2-NEXT:    vpsadbw %xmm1, %xmm2, %xmm2
+; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
+; AVX2-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv4i32:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512VPOPCNTDQ-NEXT:    vzeroupper
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %in)
   ret <4 x i32> %out
 }

Modified: llvm/trunk/test/CodeGen/X86/vector-popcnt-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-popcnt-256.ll?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-popcnt-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-popcnt-256.ll Thu May 25 08:45:23 2017
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
 
 define <4 x i64> @testv4i64(<4 x i64> %in) nounwind {
 ; AVX1-LABEL: testv4i64:
@@ -39,6 +40,13 @@ define <4 x i64> @testv4i64(<4 x i64> %i
 ; AVX2-NEXT:    vpxor %ymm1, %ymm1, %ymm1
 ; AVX2-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv4i64:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %in)
   ret <4 x i64> %out
 }
@@ -92,6 +100,13 @@ define <8 x i32> @testv8i32(<8 x i32> %i
 ; AVX2-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; AVX2-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv8i32:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> %in)
   ret <8 x i32> %out
 }
@@ -137,6 +152,21 @@ define <16 x i16> @testv16i16(<16 x i16>
 ; AVX2-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
 ; AVX2-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv16i16:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsllw $8, %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %in)
   ret <16 x i16> %out
 }
@@ -173,6 +203,18 @@ define <32 x i8> @testv32i8(<32 x i8> %i
 ; AVX2-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
 ; AVX2-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX2-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv32i8:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %in)
   ret <32 x i8> %out
 }

Modified: llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-popcnt-512.ll Thu May 25 08:45:23 2017
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512F
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VPOPCNTDQ
 
 define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
 ; AVX512F-LABEL: testv8i64:
@@ -39,6 +40,11 @@ define <8 x i64> @testv8i64(<8 x i64> %i
 ; AVX512BW-NEXT:    vpxord %zmm1, %zmm1, %zmm1
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv8i64:
+; AVX512VPOPCNTDQ:       ## BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> %in)
   ret <8 x i64> %out
 }
@@ -92,6 +98,11 @@ define <16 x i32> @testv16i32(<16 x i32>
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv16i32:
+; AVX512VPOPCNTDQ:       ## BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <16 x i32> @llvm.ctpop.v16i32(<16 x i32> %in)
   ret <16 x i32> %out
 }
@@ -135,6 +146,30 @@ define <32 x i16> @testv32i16(<32 x i16>
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv32i16:
+; AVX512VPOPCNTDQ:       ## BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm2, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsllw $8, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm0, %ymm3, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsllw $8, %ymm1, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %in)
   ret <32 x i16> %out
 }
@@ -169,6 +204,24 @@ define <64 x i8> @testv64i8(<64 x i8> %i
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv64i8:
+; AVX512VPOPCNTDQ:       ## BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm2, %ymm0, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm2, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm4, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm2, %ymm1, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm3, %ymm4, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm1, %ymm4, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm3, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %in)
   ret <64 x i8> %out
 }

Modified: llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-tzcnt-128.ll Thu May 25 08:45:23 2017
@@ -7,6 +7,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CDVL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,-avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CD
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VPOPCNTDQ
 ;
 ; Just one 32-bit run to make sure we do reasonable things for i64 tzcnt.
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=X32-SSE --check-prefix=X32-SSE41
@@ -117,6 +118,17 @@ define <2 x i64> @testv2i64(<2 x i64> %i
 ; AVX-NEXT:    vpsadbw %xmm1, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv2i64:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubq %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512VPOPCNTDQ-NEXT:    vzeroupper
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: testv2i64:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pxor %xmm1, %xmm1
@@ -284,6 +296,17 @@ define <2 x i64> @testv2i64u(<2 x i64> %
 ; AVX512CD-NEXT:    vzeroupper
 ; AVX512CD-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv2i64u:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubq %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubq {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512VPOPCNTDQ-NEXT:    vzeroupper
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: testv2i64u:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pxor %xmm1, %xmm1
@@ -501,6 +524,18 @@ define <4 x i32> @testv4i32(<4 x i32> %i
 ; AVX512CD-NEXT:    vpackuswb %xmm2, %xmm0, %xmm0
 ; AVX512CD-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv4i32:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubd %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512VPOPCNTDQ-NEXT:    vzeroupper
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: testv4i32:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pxor %xmm1, %xmm1
@@ -700,6 +735,18 @@ define <4 x i32> @testv4i32u(<4 x i32> %
 ; AVX512CD-NEXT:    vzeroupper
 ; AVX512CD-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv4i32u:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubd %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubd %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512VPOPCNTDQ-NEXT:    vzeroupper
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: testv4i32u:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pxor %xmm1, %xmm1
@@ -843,6 +890,25 @@ define <8 x i16> @testv8i16(<8 x i16> %i
 ; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv8i16:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubw %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsllw $8, %xmm0, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: testv8i16:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pxor %xmm1, %xmm1
@@ -984,6 +1050,25 @@ define <8 x i16> @testv8i16u(<8 x i16> %
 ; AVX-NEXT:    vpsrlw $8, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv8i16u:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubw %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsllw $8, %xmm0, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %xmm0, %xmm1, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $8, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: testv8i16u:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pxor %xmm1, %xmm1
@@ -1106,6 +1191,22 @@ define <16 x i8> @testv16i8(<16 x i8> %i
 ; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv16i8:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubb %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: testv16i8:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pxor %xmm1, %xmm1
@@ -1224,6 +1325,22 @@ define <16 x i8> @testv16i8u(<16 x i8> %
 ; AVX-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv16i8u:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %xmm1, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubb %xmm0, %xmm1, %xmm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm2
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %xmm2, %xmm3, %xmm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %xmm0, %xmm3, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %xmm2, %xmm0, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: testv16i8u:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    pxor %xmm1, %xmm1
@@ -1258,6 +1375,12 @@ define <2 x i64> @foldv2i64() nounwind {
 ; AVX-NEXT:    vmovq %rax, %xmm0
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: foldv2i64:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    movl $8, %eax
+; AVX512VPOPCNTDQ-NEXT:    vmovq %rax, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: foldv2i64:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    movl $8, %eax
@@ -1280,6 +1403,12 @@ define <2 x i64> @foldv2i64u() nounwind
 ; AVX-NEXT:    vmovq %rax, %xmm0
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: foldv2i64u:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    movl $8, %eax
+; AVX512VPOPCNTDQ-NEXT:    vmovq %rax, %xmm0
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: foldv2i64u:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    movl $8, %eax
@@ -1300,6 +1429,11 @@ define <4 x i32> @foldv4i32() nounwind {
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,32,0]
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: foldv4i32:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,32,0]
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: foldv4i32:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,32,0]
@@ -1319,6 +1453,11 @@ define <4 x i32> @foldv4i32u() nounwind
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,32,0]
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: foldv4i32u:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,32,0]
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: foldv4i32u:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,32,0]
@@ -1338,6 +1477,11 @@ define <8 x i16> @foldv8i16() nounwind {
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: foldv8i16:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: foldv8i16:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
@@ -1357,6 +1501,11 @@ define <8 x i16> @foldv8i16u() nounwind
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: foldv8i16u:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: foldv8i16u:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
@@ -1376,6 +1525,11 @@ define <16 x i8> @foldv16i8() nounwind {
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: foldv16i8:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: foldv16i8:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
@@ -1395,6 +1549,11 @@ define <16 x i8> @foldv16i8u() nounwind
 ; AVX-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
 ; AVX-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: foldv16i8u:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-SSE-LABEL: foldv16i8u:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]

Modified: llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-tzcnt-256.ll Thu May 25 08:45:23 2017
@@ -3,6 +3,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CDVL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512cd,-avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512CD
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ
 ;
 ; Just one 32-bit run to make sure we do reasonable things for i64 tzcnt.
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=X32-AVX --check-prefix=X32-AVX2
@@ -92,6 +93,17 @@ define <4 x i64> @testv4i64(<4 x i64> %i
 ; AVX512CD-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0
 ; AVX512CD-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv4i64:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubq %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-AVX-LABEL: testv4i64:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpxor %ymm1, %ymm1, %ymm1
@@ -182,6 +194,17 @@ define <4 x i64> @testv4i64u(<4 x i64> %
 ; AVX512CD-NEXT:    vpsubq %ymm0, %ymm1, %ymm0
 ; AVX512CD-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv4i64u:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubq %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpbroadcastq {{.*}}(%rip), %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubq %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-AVX-LABEL: testv4i64u:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpxor %ymm1, %ymm1, %ymm1
@@ -307,6 +330,17 @@ define <8 x i32> @testv8i32(<8 x i32> %i
 ; AVX512CD-NEXT:    vpackuswb %ymm2, %ymm0, %ymm0
 ; AVX512CD-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv8i32:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubd %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-AVX-LABEL: testv8i32:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpxor %ymm1, %ymm1, %ymm1
@@ -414,6 +448,17 @@ define <8 x i32> @testv8i32u(<8 x i32> %
 ; AVX512CD-NEXT:    vpsubd %ymm0, %ymm1, %ymm0
 ; AVX512CD-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv8i32u:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubd %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubd %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-AVX-LABEL: testv8i32u:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpxor %ymm1, %ymm1, %ymm1
@@ -532,6 +577,25 @@ define <16 x i16> @testv16i16(<16 x i16>
 ; AVX512CD-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX512CD-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv16i16:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubw %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsllw $8, %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-AVX-LABEL: testv16i16:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpxor %ymm1, %ymm1, %ymm1
@@ -647,6 +711,25 @@ define <16 x i16> @testv16i16u(<16 x i16
 ; AVX512CD-NEXT:    vpsrlw $8, %ymm0, %ymm0
 ; AVX512CD-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv16i16u:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubw %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubw {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsllw $8, %ymm0, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm0, %ymm1, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-AVX-LABEL: testv16i16u:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpxor %ymm1, %ymm1, %ymm1
@@ -747,6 +830,22 @@ define <32 x i8> @testv32i8(<32 x i8> %i
 ; AVX512CD-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX512CD-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv32i8:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubb %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubb {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-AVX-LABEL: testv32i8:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpxor %ymm1, %ymm1, %ymm1
@@ -844,6 +943,22 @@ define <32 x i8> @testv32i8u(<32 x i8> %
 ; AVX512CD-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
 ; AVX512CD-NEXT:    retq
 ;
+; AVX512VPOPCNTDQ-LABEL: testv32i8u:
+; AVX512VPOPCNTDQ:       # BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %ymm1, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubb %ymm0, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubb {{.*}}(%rip), %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm2, %ymm3, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm1, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm3, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    retq
+;
 ; X32-AVX-LABEL: testv32i8u:
 ; X32-AVX:       # BB#0:
 ; X32-AVX-NEXT:    vpxor %ymm1, %ymm1, %ymm1

Modified: llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll Thu May 25 08:45:23 2017
@@ -2,6 +2,7 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd,-avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512CD
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512CDBW
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=-avx512cd,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VPOPCNTDQ
 
 define <8 x i64> @testv8i64(<8 x i64> %in) nounwind {
 ; AVX512CD-LABEL: testv8i64:
@@ -64,6 +65,15 @@ define <8 x i64> @testv8i64(<8 x i64> %i
 ; AVX512BW-NEXT:    vpaddb %zmm3, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv8i64:
+; AVX512VPOPCNTDQ:       ## BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubq %zmm0, %zmm1, %zmm1
+; AVX512VPOPCNTDQ-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %in, i1 0)
   ret <8 x i64> %out
 }
@@ -105,6 +115,15 @@ define <8 x i64> @testv8i64u(<8 x i64> %
 ; AVX512BW-NEXT:    vpaddb %zmm3, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv8i64u:
+; AVX512VPOPCNTDQ:       ## BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubq %zmm0, %zmm1, %zmm1
+; AVX512VPOPCNTDQ-NEXT:    vpandq %zmm1, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubq {{.*}}(%rip){1to8}, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    vpopcntq %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <8 x i64> @llvm.cttz.v8i64(<8 x i64> %in, i1 -1)
   ret <8 x i64> %out
 }
@@ -186,6 +205,15 @@ define <16 x i32> @testv16i32(<16 x i32>
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv16i32:
+; AVX512VPOPCNTDQ:       ## BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubd %zmm0, %zmm1, %zmm1
+; AVX512VPOPCNTDQ-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %in, i1 0)
   ret <16 x i32> %out
 }
@@ -231,6 +259,15 @@ define <16 x i32> @testv16i32u(<16 x i32
 ; AVX512BW-NEXT:    vpsadbw %zmm1, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vpackuswb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv16i32u:
+; AVX512VPOPCNTDQ:       ## BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxord %zmm1, %zmm1, %zmm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubd %zmm0, %zmm1, %zmm1
+; AVX512VPOPCNTDQ-NEXT:    vpandd %zmm1, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubd {{.*}}(%rip){1to16}, %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    vpopcntd %zmm0, %zmm0
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %in, i1 -1)
   ret <16 x i32> %out
 }
@@ -305,6 +342,38 @@ define <32 x i16> @testv32i16(<32 x i16>
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv32i16:
+; AVX512VPOPCNTDQ:       ## BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsubw %ymm0, %ymm2, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512VPOPCNTDQ-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm0, %ymm5
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsllw $8, %ymm0, %ymm5
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm0, %ymm5, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubw %ymm1, %ymm2, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubw %ymm3, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm1, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsllw $8, %ymm1, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %in, i1 0)
   ret <32 x i16> %out
 }
@@ -379,6 +448,38 @@ define <32 x i16> @testv32i16u(<32 x i16
 ; AVX512BW-NEXT:    vpaddb %zmm0, %zmm1, %zmm0
 ; AVX512BW-NEXT:    vpsrlw $8, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv32i16u:
+; AVX512VPOPCNTDQ:       ## BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsubw %ymm0, %ymm2, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512VPOPCNTDQ-NEXT:    vpsubw %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm0, %ymm5
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsllw $8, %ymm0, %ymm5
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm0, %ymm5, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $8, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubw %ymm1, %ymm2, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubw %ymm3, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm1, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsllw $8, %ymm1, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm1, %ymm2, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $8, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <32 x i16> @llvm.cttz.v32i16(<32 x i16> %in, i1 -1)
   ret <32 x i16> %out
 }
@@ -441,6 +542,32 @@ define <64 x i8> @testv64i8(<64 x i8> %i
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv64i8:
+; AVX512VPOPCNTDQ:       ## BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsubb %ymm0, %ymm2, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512VPOPCNTDQ-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm0, %ymm5
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubb %ymm1, %ymm2, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm1, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %in, i1 0)
   ret <64 x i8> %out
 }
@@ -503,6 +630,32 @@ define <64 x i8> @testv64i8u(<64 x i8> %
 ; AVX512BW-NEXT:    vpshufb %zmm0, %zmm3, %zmm0
 ; AVX512BW-NEXT:    vpaddb %zmm2, %zmm0, %zmm0
 ; AVX512BW-NEXT:    retq
+;
+; AVX512VPOPCNTDQ-LABEL: testv64i8u:
+; AVX512VPOPCNTDQ:       ## BB#0:
+; AVX512VPOPCNTDQ-NEXT:    vpxor %ymm2, %ymm2, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsubb %ymm0, %ymm2, %ymm3
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512VPOPCNTDQ-NEXT:    vpsubb %ymm3, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm0, %ymm5
+; AVX512VPOPCNTDQ-NEXT:    vmovdqa {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm5, %ymm6, %ymm5
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm0, %ymm6, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm5, %ymm0, %ymm0
+; AVX512VPOPCNTDQ-NEXT:    vpsubb %ymm1, %ymm2, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpsubb %ymm3, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm1, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm2, %ymm6, %ymm2
+; AVX512VPOPCNTDQ-NEXT:    vpsrlw $4, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpand %ymm4, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpshufb %ymm1, %ymm6, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    vpaddb %ymm2, %ymm1, %ymm1
+; AVX512VPOPCNTDQ-NEXT:    retq
   %out = call <64 x i8> @llvm.cttz.v64i8(<64 x i8> %in, i1 -1)
   ret <64 x i8> %out
 }

Modified: llvm/trunk/test/MC/Disassembler/X86/avx-512.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/X86/avx-512.txt?rev=303858&r1=303857&r2=303858&view=diff
==============================================================================
--- llvm/trunk/test/MC/Disassembler/X86/avx-512.txt (original)
+++ llvm/trunk/test/MC/Disassembler/X86/avx-512.txt Thu May 25 08:45:23 2017
@@ -1,5 +1,6 @@
 # RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s
 # RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 -mcpu=skx | FileCheck --check-prefix=CHECK-SKX %s
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=AVX512VPOPCNTDQ
 
 # CHECK: vpbroadcastd    %xmm18, %zmm28 {%k7} {z}
 0x62 0x22 0x7d 0xcf 0x58 0xe2
@@ -265,3 +266,25 @@
 
 # CHECK: vscatterqpd %ymm19, 256(%r9,%ymm31) {%k1}
 0x62 0x82 0xfd 0x21 0xa3 0x5c 0x39 0x20
+
+#####################################################
+#             POPULATION COUNT                      #
+#####################################################
+
+# AVX512VPOPCNTDQ: vpopcntd   %zmm21, %zmm26 {%k4}
+0x62 0x22 0x7d 0x4c 0x55 0xd5
+
+# AVX512VPOPCNTDQ: vpopcntd   %zmm21, %zmm26 {%k4} {z} 
+0x62 0x22 0x7d 0xcc 0x55 0xd5
+
+# AVX512VPOPCNTDQ: vpopcntd   (%rcx), %zmm26  
+0x62 0x62 0x7d 0x48 0x55 0x11
+
+# AVX512VPOPCNTDQ: vpopcntq   %zmm21, %zmm17 {%k6} 
+0x62 0xa2 0xfd 0x4e 0x55 0xcd
+
+# AVX512VPOPCNTDQ: vpopcntq   %zmm21, %zmm17 {%k6} {z} 
+0x62 0xa2 0xfd 0xce 0x55 0xcd
+
+# AVX512VPOPCNTDQ: vpopcntq   (%rcx), %zmm17  
+0x62 0xe2 0xfd 0x48 0x55 0x09

Added: llvm/trunk/test/MC/X86/x86-64-avx512vpopcntdq.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512vpopcntdq.s?rev=303858&view=auto
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512vpopcntdq.s (added)
+++ llvm/trunk/test/MC/X86/x86-64-avx512vpopcntdq.s Thu May 25 08:45:23 2017
@@ -0,0 +1,225 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avx512vpopcntdq --show-encoding %s | FileCheck %s
+
+// CHECK: vpopcntq   %zmm25, %zmm20  
+// CHECK: encoding: [0x62,0x82,0xfd,0x48,0x55,0xe1]
+          vpopcntq   %zmm25, %zmm20  
+
+// CHECK: vpopcntq   %zmm25, %zmm20 {%k6} 
+// CHECK: encoding: [0x62,0x82,0xfd,0x4e,0x55,0xe1]
+          vpopcntq   %zmm25, %zmm20 {%k6} 
+
+// CHECK: vpopcntq   %zmm25, %zmm20 {%k6} {z} 
+// CHECK: encoding: [0x62,0x82,0xfd,0xce,0x55,0xe1]
+          vpopcntq   %zmm25, %zmm20 {%k6} {z} 
+
+// CHECK: vpopcntq   (%rcx), %zmm20  
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x21]
+          vpopcntq   (%rcx), %zmm20  
+
+// CHECK: vpopcntq   291(%rax,%r14,8), %zmm20 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x55,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpopcntq   291(%rax,%r14,8), %zmm20 
+
+// CHECK: vpopcntq   (%rcx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x21]
+          vpopcntq   (%rcx){1to8}, %zmm20 
+
+// CHECK: vpopcntq   4064(%rdx), %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0xa2,0xe0,0x0f,0x00,0x00]
+          vpopcntq   4064(%rdx), %zmm20 
+
+// CHECK: vpopcntq   4096(%rdx), %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x62,0x40]
+          vpopcntq   4096(%rdx), %zmm20 
+
+// CHECK: vpopcntq   -4096(%rdx), %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x62,0xc0]
+          vpopcntq   -4096(%rdx), %zmm20 
+
+// CHECK: vpopcntq   -4128(%rdx), %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0xa2,0xe0,0xef,0xff,0xff]
+          vpopcntq   -4128(%rdx), %zmm20 
+
+// CHECK: vpopcntq   1016(%rdx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x62,0x7f]
+          vpopcntq   1016(%rdx){1to8}, %zmm20 
+
+// CHECK: vpopcntq   1024(%rdx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0xa2,0x00,0x04,0x00,0x00]
+          vpopcntq   1024(%rdx){1to8}, %zmm20 
+
+// CHECK: vpopcntq   -1024(%rdx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x62,0x80]
+          vpopcntq   -1024(%rdx){1to8}, %zmm20 
+
+// CHECK: vpopcntq   -1032(%rdx){1to8}, %zmm20 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0xa2,0xf8,0xfb,0xff,0xff]
+          vpopcntq   -1032(%rdx){1to8}, %zmm20 
+
+// CHECK: vpopcntq   %zmm21, %zmm17  
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x55,0xcd]
+          vpopcntq   %zmm21, %zmm17  
+
+// CHECK: vpopcntq   %zmm21, %zmm17 {%k6} 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4e,0x55,0xcd]
+          vpopcntq   %zmm21, %zmm17 {%k6} 
+
+// CHECK: vpopcntq   %zmm21, %zmm17 {%k6} {z} 
+// CHECK: encoding: [0x62,0xa2,0xfd,0xce,0x55,0xcd]
+          vpopcntq   %zmm21, %zmm17 {%k6} {z} 
+
+// CHECK: vpopcntq   (%rcx), %zmm17  
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x09]
+          vpopcntq   (%rcx), %zmm17  
+
+// CHECK: vpopcntq   4660(%rax,%r14,8), %zmm17 
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x55,0x8c,0xf0,0x34,0x12,0x00,0x00]
+          vpopcntq   4660(%rax,%r14,8), %zmm17 
+
+// CHECK: vpopcntq   (%rcx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x09]
+          vpopcntq   (%rcx){1to8}, %zmm17 
+
+// CHECK: vpopcntq   4064(%rdx), %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x8a,0xe0,0x0f,0x00,0x00]
+          vpopcntq   4064(%rdx), %zmm17 
+
+// CHECK: vpopcntq   4096(%rdx), %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x4a,0x40]
+          vpopcntq   4096(%rdx), %zmm17 
+
+// CHECK: vpopcntq   -4096(%rdx), %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x4a,0xc0]
+          vpopcntq   -4096(%rdx), %zmm17 
+
+// CHECK: vpopcntq   -4128(%rdx), %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x8a,0xe0,0xef,0xff,0xff]
+          vpopcntq   -4128(%rdx), %zmm17 
+
+// CHECK: vpopcntq   1016(%rdx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x4a,0x7f]
+          vpopcntq   1016(%rdx){1to8}, %zmm17 
+
+// CHECK: vpopcntq   1024(%rdx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x8a,0x00,0x04,0x00,0x00]
+          vpopcntq   1024(%rdx){1to8}, %zmm17 
+
+// CHECK: vpopcntq   -1024(%rdx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x4a,0x80]
+          vpopcntq   -1024(%rdx){1to8}, %zmm17 
+
+// CHECK: vpopcntq   -1032(%rdx){1to8}, %zmm17 
+// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x8a,0xf8,0xfb,0xff,0xff]
+          vpopcntq   -1032(%rdx){1to8}, %zmm17 
+
+// CHECK: vpopcntd   %zmm19, %zmm25  
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x55,0xcb]
+          vpopcntd   %zmm19, %zmm25  
+
+// CHECK: vpopcntd   %zmm19, %zmm25 {%k4} 
+// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0x55,0xcb]
+          vpopcntd   %zmm19, %zmm25 {%k4} 
+
+// CHECK: vpopcntd   %zmm19, %zmm25 {%k4} {z} 
+// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0x55,0xcb]
+          vpopcntd   %zmm19, %zmm25 {%k4} {z} 
+
+// CHECK: vpopcntd   (%rcx), %zmm25  
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x09]
+          vpopcntd   (%rcx), %zmm25  
+
+// CHECK: vpopcntd   291(%rax,%r14,8), %zmm25 
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x55,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpopcntd   291(%rax,%r14,8), %zmm25 
+
+// CHECK: vpopcntd   (%rcx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x09]
+          vpopcntd   (%rcx){1to16}, %zmm25
+
+// CHECK: vpopcntd   4064(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x8a,0xe0,0x0f,0x00,0x00]
+          vpopcntd   4064(%rdx), %zmm25 
+
+// CHECK: vpopcntd   4096(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x4a,0x40]
+          vpopcntd   4096(%rdx), %zmm25 
+
+// CHECK: vpopcntd   -4096(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x4a,0xc0]
+          vpopcntd   -4096(%rdx), %zmm25 
+
+// CHECK: vpopcntd   -4128(%rdx), %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x8a,0xe0,0xef,0xff,0xff]
+          vpopcntd   -4128(%rdx), %zmm25 
+
+// CHECK: vpopcntd   508(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x4a,0x7f]
+          vpopcntd   508(%rdx){1to16}, %zmm25 
+
+// CHECK: vpopcntd   512(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x8a,0x00,0x02,0x00,0x00]
+          vpopcntd   512(%rdx){1to16}, %zmm25 
+
+// CHECK: vpopcntd   -512(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x4a,0x80]
+          vpopcntd   -512(%rdx){1to16}, %zmm25 
+
+// CHECK: vpopcntd   -516(%rdx){1to16}, %zmm25 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x8a,0xfc,0xfd,0xff,0xff]
+          vpopcntd   -516(%rdx){1to16}, %zmm25 
+
+// CHECK: vpopcntd   %zmm21, %zmm26  
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x55,0xd5]
+          vpopcntd   %zmm21, %zmm26  
+
+// CHECK: vpopcntd   %zmm21, %zmm26 {%k4} 
+// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0x55,0xd5]
+          vpopcntd   %zmm21, %zmm26 {%k4} 
+
+// CHECK: vpopcntd   %zmm21, %zmm26 {%k4} {z} 
+// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0x55,0xd5]
+          vpopcntd   %zmm21, %zmm26 {%k4} {z} 
+
+// CHECK: vpopcntd   (%rcx), %zmm26  
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x11]
+          vpopcntd   (%rcx), %zmm26  
+
+// CHECK: vpopcntd   4660(%rax,%r14,8), %zmm26 
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x55,0x94,0xf0,0x34,0x12,0x00,0x00]
+          vpopcntd   4660(%rax,%r14,8), %zmm26 
+
+// CHECK: vpopcntd   (%rcx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x11]
+          vpopcntd   (%rcx){1to16}, %zmm26 
+
+// CHECK: vpopcntd   4064(%rdx), %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x92,0xe0,0x0f,0x00,0x00]
+          vpopcntd   4064(%rdx), %zmm26 
+
+// CHECK: vpopcntd   4096(%rdx), %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x52,0x40]
+          vpopcntd   4096(%rdx), %zmm26 
+
+// CHECK: vpopcntd   -4096(%rdx), %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x52,0xc0]
+          vpopcntd   -4096(%rdx), %zmm26 
+
+// CHECK: vpopcntd   -4128(%rdx), %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x92,0xe0,0xef,0xff,0xff]
+          vpopcntd   -4128(%rdx), %zmm26 
+
+// CHECK: vpopcntd   508(%rdx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x52,0x7f]
+          vpopcntd   508(%rdx){1to16}, %zmm26 
+
+// CHECK: vpopcntd   512(%rdx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x92,0x00,0x02,0x00,0x00]
+          vpopcntd   512(%rdx){1to16}, %zmm26 
+
+// CHECK: vpopcntd   -512(%rdx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x52,0x80]
+          vpopcntd   -512(%rdx){1to16}, %zmm26 
+
+// CHECK: vpopcntd   -516(%rdx){1to16}, %zmm26 
+// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x92,0xfc,0xfd,0xff,0xff]
+          vpopcntd   -516(%rdx){1to16}, %zmm26 




More information about the llvm-commits mailing list