[llvm] r318746 - [x86][icelake]VNNI

Coby Tayree via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 21 02:04:29 PST 2017


Author: coby
Date: Tue Nov 21 02:04:28 2017
New Revision: 318746

URL: http://llvm.org/viewvc/llvm-project?rev=318746&view=rev
Log:
[x86][icelake]VNNI
Introducing Vector Neural Network Instructions, consisting of:
vpdpbusd{s}
vpdpwssd{s}
Differential Revision: https://reviews.llvm.org/D40208

Added:
    llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll
    llvm/trunk/test/MC/X86/avx512vl_vnni-encoding.s
    llvm/trunk/test/MC/X86/avx512vnni-encoding.s
Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/Support/Host.cpp
    llvm/trunk/lib/Target/X86/X86.td
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/lib/Target/X86/X86InstrInfo.td
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/lib/Target/X86/X86Subtarget.cpp
    llvm/trunk/lib/Target/X86/X86Subtarget.h

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=318746&r1=318745&r2=318746&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Tue Nov 21 02:04:28 2017
@@ -2728,6 +2728,109 @@ let TargetPrefix = "x86" in {  // All in
                          llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
 }
 
+// VNNI
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx512_mask_vpdpbusd_128 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusd128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpdpbusd_128 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusd128_maskz">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpdpbusd_256 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusd256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpdpbusd_256 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusd256_maskz">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpdpbusd_512 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusd512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpdpbusd_512 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusd512_maskz">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpdpbusds_128 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusds128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpdpbusds_128 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusds128_maskz">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpdpbusds_256 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusds256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpdpbusds_256 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusds256_maskz">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpdpbusds_512 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusds512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpdpbusds_512 :
+              GCCBuiltin<"__builtin_ia32_vpdpbusds512_maskz">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpdpwssd_128 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssd128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpdpwssd_128 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssd128_maskz">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpdpwssd_256 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssd256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpdpwssd_256 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssd256_maskz">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpdpwssd_512 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssd512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpdpwssd_512 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssd512_maskz">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpdpwssds_128 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssds128_mask">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpdpwssds_128 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssds128_maskz">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpdpwssds_256 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssds256_mask">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpdpwssds_256 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssds256_maskz">,
+              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpdpwssds_512 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssds512_mask">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpdpwssds_512 :
+              GCCBuiltin<"__builtin_ia32_vpdpwssds512_maskz">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+}
+
 //===----------------------------------------------------------------------===//
 // XOP
 

Modified: llvm/trunk/lib/Support/Host.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Host.cpp?rev=318746&r1=318745&r2=318746&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Host.cpp (original)
+++ llvm/trunk/lib/Support/Host.cpp Tue Nov 21 02:04:28 2017
@@ -1266,6 +1266,9 @@ bool sys::getHostCPUFeatures(StringMap<b
   // VPCLMULQDQ (carry-less multiplication quadword)
   Features["vpclmulqdq"] = HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave;
 
+  // Enable Vector Neural Network Instructions
+  Features["avx512vnni"] = HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save;
+
   bool HasLeafD = MaxLevel >= 0xd &&
                   !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
 

Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=318746&r1=318745&r2=318746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Tue Nov 21 02:04:28 2017
@@ -160,6 +160,9 @@ def FeatureIFMA     : SubtargetFeature<"
                                       [FeatureAVX512]>;
 def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
                       "Enable protection keys">;
+def FeatureVNNI    : SubtargetFeature<"avx512vnni", "HasVNNI", "true",
+                          "Enable AVX-512 Vector Neural Network Instructions",
+                                      [FeatureAVX512]>;
 def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
                          "Enable packed carry-less multiplication instructions",
                                [FeatureSSE2]>;

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=318746&r1=318745&r2=318746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 21 02:04:28 2017
@@ -25250,6 +25250,10 @@ const char *X86TargetLowering::getTarget
   case X86ISD::CVTS2UI_RND:        return "X86ISD::CVTS2UI_RND";
   case X86ISD::LWPINS:             return "X86ISD::LWPINS";
   case X86ISD::MGATHER:            return "X86ISD::MGATHER";
+  case X86ISD::VPDPBUSD:           return "X86ISD::VPDPBUSD";
+  case X86ISD::VPDPBUSDS:          return "X86ISD::VPDPBUSDS";
+  case X86ISD::VPDPWSSD:           return "X86ISD::VPDPWSSD";
+  case X86ISD::VPDPWSSDS:          return "X86ISD::VPDPWSSDS";
   }
   return nullptr;
 }

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=318746&r1=318745&r2=318746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Nov 21 02:04:28 2017
@@ -481,6 +481,12 @@ namespace llvm {
       // op0 x op1 + op2.
       VPMADD52L, VPMADD52H,
 
+      // VNNI
+      VPDPBUSD,
+      VPDPBUSDS,
+      VPDPWSSD,
+      VPDPWSSDS,
+
       // FMA nodes.
       // We use the target independent ISD::FMA for the non-inverted case.
       FNMADD,

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=318746&r1=318745&r2=318746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Nov 21 02:04:28 2017
@@ -10160,3 +10160,47 @@ defm VPEXPANDB : expand_by_elt_width <0x
 defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", avx512vl_i16_info,
                                       HasVBMI2>, EVEX, VEX_W;
 
+//===----------------------------------------------------------------------===//
+// VNNI
+//===----------------------------------------------------------------------===//
+
+let Constraints = "$src1 = $dst" in
+multiclass VNNI_rmb<bits<8> Op, string OpStr, SDNode OpNode,
+                    X86VectorVTInfo VTI> {
+  defm r  :   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
+                                   (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
+                                   "$src3, $src2", "$src2, $src3",
+                                   (VTI.VT (OpNode VTI.RC:$src1,
+                                            VTI.RC:$src2, VTI.RC:$src3))>,
+              EVEX_4V, T8PD;
+  defm m  :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
+                                   (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
+                                   "$src3, $src2", "$src2, $src3",
+                                   (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
+                                            (VTI.VT (bitconvert
+                                                     (VTI.LdFrag addr:$src3)))))>,
+              EVEX_4V, EVEX_CD8<32, CD8VF>, T8PD;
+  defm mb :   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
+                                   (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3),
+                                   OpStr, "${src3}"##VTI.BroadcastStr##", $src2",
+                                   "$src2, ${src3}"##VTI.BroadcastStr,
+                                   (OpNode VTI.RC:$src1, VTI.RC:$src2,
+                                    (VTI.VT (X86VBroadcast
+                                             (VTI.ScalarLdFrag addr:$src3))))>,
+              EVEX_4V, EVEX_CD8<32, CD8VF>, EVEX_B, T8PD;
+}
+
+multiclass VNNI_common<bits<8> Op, string OpStr, SDNode OpNode> {
+  let Predicates = [HasVNNI] in
+  defm Z      :   VNNI_rmb<Op, OpStr, OpNode, v16i32_info>, EVEX_V512;
+  let Predicates = [HasVNNI, HasVLX] in {
+    defm Z256 :   VNNI_rmb<Op, OpStr, OpNode, v8i32x_info>, EVEX_V256;
+    defm Z128 :   VNNI_rmb<Op, OpStr, OpNode, v4i32x_info>, EVEX_V128;
+  }
+}
+
+defm VPDPBUSD   : VNNI_common<0x50, "vpdpbusd", X86Vpdpbusd>;
+defm VPDPBUSDS  : VNNI_common<0x51, "vpdpbusds", X86Vpdpbusds>;
+defm VPDPWSSD   : VNNI_common<0x52, "vpdpwssd", X86Vpdpwssd>;
+defm VPDPWSSDS  : VNNI_common<0x53, "vpdpwssds", X86Vpdpwssds>;
+

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=318746&r1=318745&r2=318746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Nov 21 02:04:28 2017
@@ -533,6 +533,15 @@ def x86vpmadd52h     : SDNode<"X86ISD::V
 
 def X86rsqrt14   : SDNode<"X86ISD::RSQRT14",  SDTFPUnaryOp>;
 def X86rcp14     : SDNode<"X86ISD::RCP14",    SDTFPUnaryOp>;
+
+// VNNI
+def SDTVnni : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                   SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>;
+def X86Vpdpbusd  : SDNode<"X86ISD::VPDPBUSD", SDTVnni>;
+def X86Vpdpbusds : SDNode<"X86ISD::VPDPBUSDS", SDTVnni>;
+def X86Vpdpwssd  : SDNode<"X86ISD::VPDPWSSD", SDTVnni>;
+def X86Vpdpwssds : SDNode<"X86ISD::VPDPWSSDS", SDTVnni>;
+
 def X86rsqrt28   : SDNode<"X86ISD::RSQRT28",  SDTFPUnaryOpRound>;
 def X86rcp28     : SDNode<"X86ISD::RCP28",    SDTFPUnaryOpRound>;
 def X86exp2      : SDNode<"X86ISD::EXP2",     SDTFPUnaryOpRound>;

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=318746&r1=318745&r2=318746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Tue Nov 21 02:04:28 2017
@@ -832,6 +832,7 @@ def NoVLX        : Predicate<"!Subtarget
 def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
 def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
 def PKU        : Predicate<"Subtarget->hasPKU()">;
+def HasVNNI    : Predicate<"Subtarget->hasVNNI()">;
 
 def HasPOPCNT    : Predicate<"Subtarget->hasPOPCNT()">;
 def HasAES       : Predicate<"Subtarget->hasAES()">;

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=318746&r1=318745&r2=318746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Tue Nov 21 02:04:28 2017
@@ -1157,6 +1157,19 @@ static const IntrinsicData  IntrinsicsWi
   X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB,
                      X86ISD::FNMSUB_RND),
 
+  X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_128,  FMA_OP_MASK, X86ISD::VPDPBUSD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_256,  FMA_OP_MASK, X86ISD::VPDPBUSD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpdpbusd_512,  FMA_OP_MASK, X86ISD::VPDPBUSD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_128, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_256, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpdpbusds_512, FMA_OP_MASK, X86ISD::VPDPBUSDS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_128,  FMA_OP_MASK, X86ISD::VPDPWSSD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_256,  FMA_OP_MASK, X86ISD::VPDPWSSD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpdpwssd_512,  FMA_OP_MASK, X86ISD::VPDPWSSD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_128, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_256, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpdpwssds_512, FMA_OP_MASK, X86ISD::VPDPWSSDS, 0),
+
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK,
                     X86ISD::VPERMIV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK,
@@ -1377,6 +1390,19 @@ static const IntrinsicData  IntrinsicsWi
   X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_ps_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
                      X86ISD::FMADDSUB_RND),
 
+  X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_128,  FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_256,  FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpdpbusd_512,  FMA_OP_MASKZ, X86ISD::VPDPBUSD, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_128, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_256, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpdpbusds_512, FMA_OP_MASKZ, X86ISD::VPDPBUSDS, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_128,  FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_256,  FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpdpwssd_512,  FMA_OP_MASKZ, X86ISD::VPDPWSSD, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_128, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_256, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpdpwssds_512, FMA_OP_MASKZ, X86ISD::VPDPWSSDS, 0),
+
   X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_128, VPERM_3OP_MASKZ,
                      X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_d_256, VPERM_3OP_MASKZ,

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=318746&r1=318745&r2=318746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Tue Nov 21 02:04:28 2017
@@ -325,6 +325,7 @@ void X86Subtarget::initializeEnvironment
   HasVLX = false;
   HasADX = false;
   HasPKU = false;
+  HasVNNI = false;
   HasSHA = false;
   HasPRFCHW = false;
   HasRDSEED = false;

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=318746&r1=318745&r2=318746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Tue Nov 21 02:04:28 2017
@@ -304,6 +304,9 @@ protected:
   /// Processor has PKU extenstions
   bool HasPKU;
 
+  /// Processor has AVX-512 Vector Neural Network Instructions
+  bool HasVNNI;
+
   /// Processor supports MPX - Memory Protection Extensions
   bool HasMPX;
 
@@ -530,6 +533,7 @@ public:
   bool hasBWI() const { return HasBWI; }
   bool hasVLX() const { return HasVLX; }
   bool hasPKU() const { return HasPKU; }
+  bool hasVNNI() const { return HasVNNI; }
   bool hasMPX() const { return HasMPX; }
   bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; }
   bool hasCLWB() const { return HasCLWB; }

Added: llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll?rev=318746&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll Tue Nov 21 02:04:28 2017
@@ -0,0 +1,195 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vnni,+avx512vl| FileCheck %s
+
+declare <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+declare <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %ymm0, %ymm3
+; CHECK-NEXT:    vpdpbusd (%rdi), %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vmovaps %ymm0, %ymm4
+; CHECK-NEXT:    vpdpbusd %ymm2, %ymm1, %ymm4
+; CHECK-NEXT:    vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %ymm0, %ymm4, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %x2 = load <8 x i32>, <8 x i32>* %x2p
+  %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1)
+  %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8  %x3)
+  %res3 = add <8 x i32> %res, %res1
+  %res4 = add <8 x i32> %res2, %res3
+  ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %xmm0, %xmm3
+; CHECK-NEXT:    vpdpbusd (%rdi), %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vmovaps %xmm0, %xmm4
+; CHECK-NEXT:    vpdpbusd %xmm2, %xmm1, %xmm4
+; CHECK-NEXT:    vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %x2 = load <4 x i32>, <4 x i32>* %x2p
+  %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1)
+  %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8  %x3)
+  %res3 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res2, %res3
+  ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+declare <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusds_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %ymm0, %ymm3
+; CHECK-NEXT:    vpdpbusds (%rdi), %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vmovaps %ymm0, %ymm4
+; CHECK-NEXT:    vpdpbusds %ymm2, %ymm1, %ymm4
+; CHECK-NEXT:    vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %ymm0, %ymm4, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %x2 = load <8 x i32>, <8 x i32>* %x2p
+  %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1)
+  %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8  %x3)
+  %res3 = add <8 x i32> %res, %res1
+  %res4 = add <8 x i32> %res2, %res3
+  ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusds_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %xmm0, %xmm3
+; CHECK-NEXT:    vpdpbusds (%rdi), %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vmovaps %xmm0, %xmm4
+; CHECK-NEXT:    vpdpbusds %xmm2, %xmm1, %xmm4
+; CHECK-NEXT:    vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %x2 = load <4 x i32>, <4 x i32>* %x2p
+  %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1)
+  %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8  %x3)
+  %res3 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res2, %res3
+  ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+declare <8 x i32> @llvm.x86.avx512.maskz.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpwssd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %ymm0, %ymm3
+; CHECK-NEXT:    vpdpwssd (%rdi), %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vmovaps %ymm0, %ymm4
+; CHECK-NEXT:    vpdpwssd %ymm2, %ymm1, %ymm4
+; CHECK-NEXT:    vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %ymm0, %ymm4, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %x2 = load <8 x i32>, <8 x i32>* %x2p
+  %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1)
+  %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8  %x3)
+  %res3 = add <8 x i32> %res, %res1
+  %res4 = add <8 x i32> %res2, %res3
+  ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.maskz.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpwssd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %xmm0, %xmm3
+; CHECK-NEXT:    vpdpwssd (%rdi), %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vmovaps %xmm0, %xmm4
+; CHECK-NEXT:    vpdpwssd %xmm2, %xmm1, %xmm4
+; CHECK-NEXT:    vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %x2 = load <4 x i32>, <4 x i32>* %x2p
+  %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1)
+  %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8  %x3)
+  %res3 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res2, %res3
+  ret <4 x i32> %res4
+}
+
+
+declare <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+declare <8 x i32> @llvm.x86.avx512.maskz.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpwssds_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %ymm0, %ymm3
+; CHECK-NEXT:    vpdpwssds (%rdi), %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vmovaps %ymm0, %ymm4
+; CHECK-NEXT:    vpdpwssds %ymm2, %ymm1, %ymm4
+; CHECK-NEXT:    vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %ymm0, %ymm4, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %x2 = load <8 x i32>, <8 x i32>* %x2p
+  %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1)
+  %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8  %x3)
+  %res3 = add <8 x i32> %res, %res1
+  %res4 = add <8 x i32> %res2, %res3
+  ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.maskz.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpwssds_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %xmm0, %xmm3
+; CHECK-NEXT:    vpdpwssds (%rdi), %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vmovaps %xmm0, %xmm4
+; CHECK-NEXT:    vpdpwssds %xmm2, %xmm1, %xmm4
+; CHECK-NEXT:    vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %x2 = load <4 x i32>, <4 x i32>* %x2p
+  %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1)
+  %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8  %x3)
+  %res3 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res2, %res3
+  ret <4 x i32> %res4
+}
+

Added: llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll?rev=318746&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512vnni-intrinsics.ll Tue Nov 21 02:04:28 2017
@@ -0,0 +1,98 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vnni | FileCheck %s
+
+declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpdpbusd (%rdi), %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vmovaps %zmm0, %zmm4
+; CHECK-NEXT:    vpdpbusd %zmm2, %zmm1, %zmm4
+; CHECK-NEXT:    vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %zmm0, %zmm4, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %x2 = load <16 x i32>, <16 x i32>* %x2p
+  %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
+  %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16  %x3)
+  %res3 = add <16 x i32> %res, %res1
+  %res4 = add <16 x i32> %res2, %res3
+  ret <16 x i32> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpbusds_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpdpbusds (%rdi), %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vmovaps %zmm0, %zmm4
+; CHECK-NEXT:    vpdpbusds %zmm2, %zmm1, %zmm4
+; CHECK-NEXT:    vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %zmm0, %zmm4, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %x2 = load <16 x i32>, <16 x i32>* %x2p
+  %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
+  %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16  %x3)
+  %res3 = add <16 x i32> %res, %res1
+  %res4 = add <16 x i32> %res2, %res3
+  ret <16 x i32> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.maskz.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_vpdpwssd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpwssd_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpdpwssd (%rdi), %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vmovaps %zmm0, %zmm4
+; CHECK-NEXT:    vpdpwssd %zmm2, %zmm1, %zmm4
+; CHECK-NEXT:    vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %zmm0, %zmm4, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %x2 = load <16 x i32>, <16 x i32>* %x2p
+  %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
+  %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16  %x3)
+  %res3 = add <16 x i32> %res, %res1
+  %res4 = add <16 x i32> %res2, %res3
+  ret <16 x i32> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.maskz.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_vpdpwssds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpdpwssds_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    vmovaps %zmm0, %zmm3
+; CHECK-NEXT:    vpdpwssds (%rdi), %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vmovaps %zmm0, %zmm4
+; CHECK-NEXT:    vpdpwssds %zmm2, %zmm1, %zmm4
+; CHECK-NEXT:    vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %zmm0, %zmm4, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %x2 = load <16 x i32>, <16 x i32>* %x2p
+  %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
+  %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16  %x3)
+  %res3 = add <16 x i32> %res, %res1
+  %res4 = add <16 x i32> %res2, %res3
+  ret <16 x i32> %res4
+}
+

Added: llvm/trunk/test/MC/X86/avx512vl_vnni-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/avx512vl_vnni-encoding.s?rev=318746&view=auto
==============================================================================
--- llvm/trunk/test/MC/X86/avx512vl_vnni-encoding.s (added)
+++ llvm/trunk/test/MC/X86/avx512vl_vnni-encoding.s Tue Nov 21 02:04:28 2017
@@ -0,0 +1,898 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vnni,+avx512vl --show-encoding < %s | FileCheck %s
+
+// CHECK: vpdpbusd %xmm3, %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x50,0xcb]
+          vpdpbusd %xmm3, %xmm2, %xmm1
+
+// CHECK: vpdpbusds %xmm3, %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x51,0xcb]
+          vpdpbusds %xmm3, %xmm2, %xmm1
+
+// CHECK: vpdpwssd %xmm3, %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x52,0xcb]
+          vpdpwssd %xmm3, %xmm2, %xmm1
+
+// CHECK: vpdpwssds %xmm3, %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x53,0xcb]
+          vpdpwssds %xmm3, %xmm2, %xmm1
+
+// CHECK: vpdpbusd %xmm23, %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x50,0xef]
+          vpdpbusd %xmm23, %xmm22, %xmm21
+
+// CHECK: vpdpbusds %xmm23, %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x51,0xef]
+          vpdpbusds %xmm23, %xmm22, %xmm21
+
+// CHECK: vpdpwssd %xmm23, %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x52,0xef]
+          vpdpwssd %xmm23, %xmm22, %xmm21
+
+// CHECK: vpdpwssds %xmm23, %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x53,0xef]
+          vpdpwssds %xmm23, %xmm22, %xmm21
+
+// CHECK: vpdpbusd %xmm3, %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x50,0xcb]
+          vpdpbusd %xmm3, %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusds %xmm3, %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x51,0xcb]
+          vpdpbusds %xmm3, %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssd %xmm3, %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x52,0xcb]
+          vpdpwssd %xmm3, %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssds %xmm3, %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x53,0xcb]
+          vpdpwssds %xmm3, %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusd %xmm23, %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x50,0xef]
+          vpdpbusd %xmm23, %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusds %xmm23, %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x51,0xef]
+          vpdpbusds %xmm23, %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssd %xmm23, %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x52,0xef]
+          vpdpwssd %xmm23, %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssds %xmm23, %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x53,0xef]
+          vpdpwssds %xmm23, %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusd  (%rcx), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x50,0x09]
+          vpdpbusd  (%rcx), %xmm2, %xmm1
+
+// CHECK: vpdpbusd  -64(%rsp), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x50,0x4c,0x24,0xfc]
+          vpdpbusd  -64(%rsp), %xmm2, %xmm1
+
+// CHECK: vpdpbusd  64(%rsp), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x50,0x4c,0x24,0x04]
+          vpdpbusd  64(%rsp), %xmm2, %xmm1
+
+// CHECK: vpdpbusd  268435456(%rcx,%r14,8), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x50,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusd  268435456(%rcx,%r14,8), %xmm2, %xmm1
+
+// CHECK: vpdpbusd  -536870912(%rcx,%r14,8), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x50,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusd  -536870912(%rcx,%r14,8), %xmm2, %xmm1
+
+// CHECK: vpdpbusd  -536870910(%rcx,%r14,8), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x50,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusd  -536870910(%rcx,%r14,8), %xmm2, %xmm1
+
+// CHECK: vpdpbusds  (%rcx), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x51,0x09]
+          vpdpbusds  (%rcx), %xmm2, %xmm1
+
+// CHECK: vpdpbusds  -64(%rsp), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x51,0x4c,0x24,0xfc]
+          vpdpbusds  -64(%rsp), %xmm2, %xmm1
+
+// CHECK: vpdpbusds  64(%rsp), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x51,0x4c,0x24,0x04]
+          vpdpbusds  64(%rsp), %xmm2, %xmm1
+
+// CHECK: vpdpbusds  268435456(%rcx,%r14,8), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x51,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusds  268435456(%rcx,%r14,8), %xmm2, %xmm1
+
+// CHECK: vpdpbusds  -536870912(%rcx,%r14,8), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x51,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusds  -536870912(%rcx,%r14,8), %xmm2, %xmm1
+
+// CHECK: vpdpbusds  -536870910(%rcx,%r14,8), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x51,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusds  -536870910(%rcx,%r14,8), %xmm2, %xmm1
+
+// CHECK: vpdpwssd  (%rcx), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x52,0x09]
+          vpdpwssd  (%rcx), %xmm2, %xmm1
+
+// CHECK: vpdpwssd  -64(%rsp), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x52,0x4c,0x24,0xfc]
+          vpdpwssd  -64(%rsp), %xmm2, %xmm1
+
+// CHECK: vpdpwssd  64(%rsp), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x52,0x4c,0x24,0x04]
+          vpdpwssd  64(%rsp), %xmm2, %xmm1
+
+// CHECK: vpdpwssd  268435456(%rcx,%r14,8), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x52,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssd  268435456(%rcx,%r14,8), %xmm2, %xmm1
+
+// CHECK: vpdpwssd  -536870912(%rcx,%r14,8), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x52,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssd  -536870912(%rcx,%r14,8), %xmm2, %xmm1
+
+// CHECK: vpdpwssd  -536870910(%rcx,%r14,8), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x52,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssd  -536870910(%rcx,%r14,8), %xmm2, %xmm1
+
+// CHECK: vpdpwssds  (%rcx), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x53,0x09]
+          vpdpwssds  (%rcx), %xmm2, %xmm1
+
+// CHECK: vpdpwssds  -64(%rsp), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x53,0x4c,0x24,0xfc]
+          vpdpwssds  -64(%rsp), %xmm2, %xmm1
+
+// CHECK: vpdpwssds  64(%rsp), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x08,0x53,0x4c,0x24,0x04]
+          vpdpwssds  64(%rsp), %xmm2, %xmm1
+
+// CHECK: vpdpwssds  268435456(%rcx,%r14,8), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x53,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssds  268435456(%rcx,%r14,8), %xmm2, %xmm1
+
+// CHECK: vpdpwssds  -536870912(%rcx,%r14,8), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x53,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssds  -536870912(%rcx,%r14,8), %xmm2, %xmm1
+
+// CHECK: vpdpwssds  -536870910(%rcx,%r14,8), %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x08,0x53,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssds  -536870910(%rcx,%r14,8), %xmm2, %xmm1
+
+// CHECK: vpdpbusd  (%rcx), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x50,0x29]
+          vpdpbusd  (%rcx), %xmm22, %xmm21
+
+// CHECK: vpdpbusd  -64(%rsp), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x50,0x6c,0x24,0xfc]
+          vpdpbusd  -64(%rsp), %xmm22, %xmm21
+
+// CHECK: vpdpbusd  64(%rsp), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x50,0x6c,0x24,0x04]
+          vpdpbusd  64(%rsp), %xmm22, %xmm21
+
+// CHECK: vpdpbusd  268435456(%rcx,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x50,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusd  268435456(%rcx,%r14,8), %xmm22, %xmm21
+
+// CHECK: vpdpbusd  -536870912(%rcx,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x50,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusd  -536870912(%rcx,%r14,8), %xmm22, %xmm21
+
+// CHECK: vpdpbusd  -536870910(%rcx,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x50,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusd  -536870910(%rcx,%r14,8), %xmm22, %xmm21
+
+// CHECK: vpdpbusds  (%rcx), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x51,0x29]
+          vpdpbusds  (%rcx), %xmm22, %xmm21
+
+// CHECK: vpdpbusds  -64(%rsp), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x51,0x6c,0x24,0xfc]
+          vpdpbusds  -64(%rsp), %xmm22, %xmm21
+
+// CHECK: vpdpbusds  64(%rsp), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x51,0x6c,0x24,0x04]
+          vpdpbusds  64(%rsp), %xmm22, %xmm21
+
+// CHECK: vpdpbusds  268435456(%rcx,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x51,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusds  268435456(%rcx,%r14,8), %xmm22, %xmm21
+
+// CHECK: vpdpbusds  -536870912(%rcx,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x51,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusds  -536870912(%rcx,%r14,8), %xmm22, %xmm21
+
+// CHECK: vpdpbusds  -536870910(%rcx,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x51,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusds  -536870910(%rcx,%r14,8), %xmm22, %xmm21
+
+// CHECK: vpdpwssd  (%rcx), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x52,0x29]
+          vpdpwssd  (%rcx), %xmm22, %xmm21
+
+// CHECK: vpdpwssd  -64(%rsp), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x52,0x6c,0x24,0xfc]
+          vpdpwssd  -64(%rsp), %xmm22, %xmm21
+
+// CHECK: vpdpwssd  64(%rsp), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x52,0x6c,0x24,0x04]
+          vpdpwssd  64(%rsp), %xmm22, %xmm21
+
+// CHECK: vpdpwssd  268435456(%rcx,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x52,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssd  268435456(%rcx,%r14,8), %xmm22, %xmm21
+
+// CHECK: vpdpwssd  -536870912(%rcx,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x52,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssd  -536870912(%rcx,%r14,8), %xmm22, %xmm21
+
+// CHECK: vpdpwssd  -536870910(%rcx,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x52,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssd  -536870910(%rcx,%r14,8), %xmm22, %xmm21
+
+// CHECK: vpdpwssds  (%rcx), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x53,0x29]
+          vpdpwssds  (%rcx), %xmm22, %xmm21
+
+// CHECK: vpdpwssds  -64(%rsp), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x53,0x6c,0x24,0xfc]
+          vpdpwssds  -64(%rsp), %xmm22, %xmm21
+
+// CHECK: vpdpwssds  64(%rsp), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x00,0x53,0x6c,0x24,0x04]
+          vpdpwssds  64(%rsp), %xmm22, %xmm21
+
+// CHECK: vpdpwssds  268435456(%rcx,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x53,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssds  268435456(%rcx,%r14,8), %xmm22, %xmm21
+
+// CHECK: vpdpwssds  -536870912(%rcx,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x53,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssds  -536870912(%rcx,%r14,8), %xmm22, %xmm21
+
+// CHECK: vpdpwssds  -536870910(%rcx,%r14,8), %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x00,0x53,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssds  -536870910(%rcx,%r14,8), %xmm22, %xmm21
+
+// CHECK: vpdpbusd  (%rcx), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x50,0x09]
+          vpdpbusd  (%rcx), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusd  -64(%rsp), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x50,0x4c,0x24,0xfc]
+          vpdpbusd  -64(%rsp), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusd  64(%rsp), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x50,0x4c,0x24,0x04]
+          vpdpbusd  64(%rsp), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusd  268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x50,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusd  268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusd  -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x50,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusd  -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusd  -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x50,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusd  -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusds  (%rcx), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x51,0x09]
+          vpdpbusds  (%rcx), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusds  -64(%rsp), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x51,0x4c,0x24,0xfc]
+          vpdpbusds  -64(%rsp), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusds  64(%rsp), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x51,0x4c,0x24,0x04]
+          vpdpbusds  64(%rsp), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusds  268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x51,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusds  268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusds  -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x51,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusds  -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusds  -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x51,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusds  -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssd  (%rcx), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x52,0x09]
+          vpdpwssd  (%rcx), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssd  -64(%rsp), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x52,0x4c,0x24,0xfc]
+          vpdpwssd  -64(%rsp), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssd  64(%rsp), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x52,0x4c,0x24,0x04]
+          vpdpwssd  64(%rsp), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssd  268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x52,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssd  268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssd  -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x52,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssd  -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssd  -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x52,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssd  -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssds  (%rcx), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x53,0x09]
+          vpdpwssds  (%rcx), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssds  -64(%rsp), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x53,0x4c,0x24,0xfc]
+          vpdpwssds  -64(%rsp), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssds  64(%rsp), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x0a,0x53,0x4c,0x24,0x04]
+          vpdpwssds  64(%rsp), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssds  268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x53,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssds  268435456(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssds  -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x53,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssds  -536870912(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpwssds  -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x0a,0x53,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssds  -536870910(%rcx,%r14,8), %xmm2, %xmm1 {%k2}
+
+// CHECK: vpdpbusd  (%rcx), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x50,0x29]
+          vpdpbusd  (%rcx), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusd  -64(%rsp), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x50,0x6c,0x24,0xfc]
+          vpdpbusd  -64(%rsp), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusd  64(%rsp), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x50,0x6c,0x24,0x04]
+          vpdpbusd  64(%rsp), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusd  268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x50,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusd  268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusd  -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x50,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusd  -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusd  -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x50,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusd  -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusds  (%rcx), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x51,0x29]
+          vpdpbusds  (%rcx), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusds  -64(%rsp), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x51,0x6c,0x24,0xfc]
+          vpdpbusds  -64(%rsp), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusds  64(%rsp), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x51,0x6c,0x24,0x04]
+          vpdpbusds  64(%rsp), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusds  268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x51,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusds  268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusds  -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x51,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusds  -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusds  -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x51,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusds  -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssd  (%rcx), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x52,0x29]
+          vpdpwssd  (%rcx), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssd  -64(%rsp), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x52,0x6c,0x24,0xfc]
+          vpdpwssd  -64(%rsp), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssd  64(%rsp), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x52,0x6c,0x24,0x04]
+          vpdpwssd  64(%rsp), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssd  268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x52,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssd  268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssd  -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x52,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssd  -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssd  -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x52,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssd  -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssds  (%rcx), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x53,0x29]
+          vpdpwssds  (%rcx), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssds  -64(%rsp), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x53,0x6c,0x24,0xfc]
+          vpdpwssds  -64(%rsp), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssds  64(%rsp), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x02,0x53,0x6c,0x24,0x04]
+          vpdpwssds  64(%rsp), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssds  268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x53,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssds  268435456(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssds  -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x53,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssds  -536870912(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpwssds  -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x02,0x53,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssds  -536870910(%rcx,%r14,8), %xmm22, %xmm21 {%k2}
+
+// CHECK: vpdpbusd %ymm3, %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x50,0xcb]
+          vpdpbusd %ymm3, %ymm2, %ymm1
+
+// CHECK: vpdpbusds %ymm3, %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x51,0xcb]
+          vpdpbusds %ymm3, %ymm2, %ymm1
+
+// CHECK: vpdpwssd %ymm3, %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x52,0xcb]
+          vpdpwssd %ymm3, %ymm2, %ymm1
+
+// CHECK: vpdpwssds %ymm3, %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x53,0xcb]
+          vpdpwssds %ymm3, %ymm2, %ymm1
+
+// CHECK: vpdpbusd %ymm23, %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x50,0xef]
+          vpdpbusd %ymm23, %ymm22, %ymm21
+
+// CHECK: vpdpbusds %ymm23, %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x51,0xef]
+          vpdpbusds %ymm23, %ymm22, %ymm21
+
+// CHECK: vpdpwssd %ymm23, %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x52,0xef]
+          vpdpwssd %ymm23, %ymm22, %ymm21
+
+// CHECK: vpdpwssds %ymm23, %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x53,0xef]
+          vpdpwssds %ymm23, %ymm22, %ymm21
+
+// CHECK: vpdpbusd %ymm3, %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x50,0xcb]
+          vpdpbusd %ymm3, %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusds %ymm3, %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x51,0xcb]
+          vpdpbusds %ymm3, %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssd %ymm3, %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x52,0xcb]
+          vpdpwssd %ymm3, %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssds %ymm3, %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x53,0xcb]
+          vpdpwssds %ymm3, %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusd %ymm23, %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x50,0xef]
+          vpdpbusd %ymm23, %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusds %ymm23, %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x51,0xef]
+          vpdpbusds %ymm23, %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssd %ymm23, %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x52,0xef]
+          vpdpwssd %ymm23, %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssds %ymm23, %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x53,0xef]
+          vpdpwssds %ymm23, %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusd  (%rcx), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x50,0x09]
+          vpdpbusd  (%rcx), %ymm2, %ymm1
+
+// CHECK: vpdpbusd  -128(%rsp), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x50,0x4c,0x24,0xfc]
+          vpdpbusd  -128(%rsp), %ymm2, %ymm1
+
+// CHECK: vpdpbusd  128(%rsp), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x50,0x4c,0x24,0x04]
+          vpdpbusd  128(%rsp), %ymm2, %ymm1
+
+// CHECK: vpdpbusd  268435456(%rcx,%r14,8), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x50,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusd  268435456(%rcx,%r14,8), %ymm2, %ymm1
+
+// CHECK: vpdpbusd  -536870912(%rcx,%r14,8), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x50,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusd  -536870912(%rcx,%r14,8), %ymm2, %ymm1
+
+// CHECK: vpdpbusd  -536870910(%rcx,%r14,8), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x50,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusd  -536870910(%rcx,%r14,8), %ymm2, %ymm1
+
+// CHECK: vpdpbusds  (%rcx), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x51,0x09]
+          vpdpbusds  (%rcx), %ymm2, %ymm1
+
+// CHECK: vpdpbusds  -128(%rsp), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x51,0x4c,0x24,0xfc]
+          vpdpbusds  -128(%rsp), %ymm2, %ymm1
+
+// CHECK: vpdpbusds  128(%rsp), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x51,0x4c,0x24,0x04]
+          vpdpbusds  128(%rsp), %ymm2, %ymm1
+
+// CHECK: vpdpbusds  268435456(%rcx,%r14,8), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x51,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusds  268435456(%rcx,%r14,8), %ymm2, %ymm1
+
+// CHECK: vpdpbusds  -536870912(%rcx,%r14,8), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x51,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusds  -536870912(%rcx,%r14,8), %ymm2, %ymm1
+
+// CHECK: vpdpbusds  -536870910(%rcx,%r14,8), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x51,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusds  -536870910(%rcx,%r14,8), %ymm2, %ymm1
+
+// CHECK: vpdpwssd  (%rcx), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x52,0x09]
+          vpdpwssd  (%rcx), %ymm2, %ymm1
+
+// CHECK: vpdpwssd  -128(%rsp), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x52,0x4c,0x24,0xfc]
+          vpdpwssd  -128(%rsp), %ymm2, %ymm1
+
+// CHECK: vpdpwssd  128(%rsp), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x52,0x4c,0x24,0x04]
+          vpdpwssd  128(%rsp), %ymm2, %ymm1
+
+// CHECK: vpdpwssd  268435456(%rcx,%r14,8), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x52,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssd  268435456(%rcx,%r14,8), %ymm2, %ymm1
+
+// CHECK: vpdpwssd  -536870912(%rcx,%r14,8), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x52,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssd  -536870912(%rcx,%r14,8), %ymm2, %ymm1
+
+// CHECK: vpdpwssd  -536870910(%rcx,%r14,8), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x52,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssd  -536870910(%rcx,%r14,8), %ymm2, %ymm1
+
+// CHECK: vpdpwssds  (%rcx), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x53,0x09]
+          vpdpwssds  (%rcx), %ymm2, %ymm1
+
+// CHECK: vpdpwssds  -128(%rsp), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x53,0x4c,0x24,0xfc]
+          vpdpwssds  -128(%rsp), %ymm2, %ymm1
+
+// CHECK: vpdpwssds  128(%rsp), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x28,0x53,0x4c,0x24,0x04]
+          vpdpwssds  128(%rsp), %ymm2, %ymm1
+
+// CHECK: vpdpwssds  268435456(%rcx,%r14,8), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x53,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssds  268435456(%rcx,%r14,8), %ymm2, %ymm1
+
+// CHECK: vpdpwssds  -536870912(%rcx,%r14,8), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x53,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssds  -536870912(%rcx,%r14,8), %ymm2, %ymm1
+
+// CHECK: vpdpwssds  -536870910(%rcx,%r14,8), %ymm2, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x28,0x53,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssds  -536870910(%rcx,%r14,8), %ymm2, %ymm1
+
+// CHECK: vpdpbusd  (%rcx), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x50,0x29]
+          vpdpbusd  (%rcx), %ymm22, %ymm21
+
+// CHECK: vpdpbusd  -128(%rsp), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x50,0x6c,0x24,0xfc]
+          vpdpbusd  -128(%rsp), %ymm22, %ymm21
+
+// CHECK: vpdpbusd  128(%rsp), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x50,0x6c,0x24,0x04]
+          vpdpbusd  128(%rsp), %ymm22, %ymm21
+
+// CHECK: vpdpbusd  268435456(%rcx,%r14,8), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x50,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusd  268435456(%rcx,%r14,8), %ymm22, %ymm21
+
+// CHECK: vpdpbusd  -536870912(%rcx,%r14,8), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x50,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusd  -536870912(%rcx,%r14,8), %ymm22, %ymm21
+
+// CHECK: vpdpbusd  -536870910(%rcx,%r14,8), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x50,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusd  -536870910(%rcx,%r14,8), %ymm22, %ymm21
+
+// CHECK: vpdpbusds  (%rcx), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x51,0x29]
+          vpdpbusds  (%rcx), %ymm22, %ymm21
+
+// CHECK: vpdpbusds  -128(%rsp), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x51,0x6c,0x24,0xfc]
+          vpdpbusds  -128(%rsp), %ymm22, %ymm21
+
+// CHECK: vpdpbusds  128(%rsp), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x51,0x6c,0x24,0x04]
+          vpdpbusds  128(%rsp), %ymm22, %ymm21
+
+// CHECK: vpdpbusds  268435456(%rcx,%r14,8), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x51,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusds  268435456(%rcx,%r14,8), %ymm22, %ymm21
+
+// CHECK: vpdpbusds  -536870912(%rcx,%r14,8), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x51,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusds  -536870912(%rcx,%r14,8), %ymm22, %ymm21
+
+// CHECK: vpdpbusds  -536870910(%rcx,%r14,8), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x51,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusds  -536870910(%rcx,%r14,8), %ymm22, %ymm21
+
+// CHECK: vpdpwssd  (%rcx), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x52,0x29]
+          vpdpwssd  (%rcx), %ymm22, %ymm21
+
+// CHECK: vpdpwssd  -128(%rsp), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x52,0x6c,0x24,0xfc]
+          vpdpwssd  -128(%rsp), %ymm22, %ymm21
+
+// CHECK: vpdpwssd  128(%rsp), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x52,0x6c,0x24,0x04]
+          vpdpwssd  128(%rsp), %ymm22, %ymm21
+
+// CHECK: vpdpwssd  268435456(%rcx,%r14,8), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x52,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssd  268435456(%rcx,%r14,8), %ymm22, %ymm21
+
+// CHECK: vpdpwssd  -536870912(%rcx,%r14,8), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x52,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssd  -536870912(%rcx,%r14,8), %ymm22, %ymm21
+
+// CHECK: vpdpwssd  -536870910(%rcx,%r14,8), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x52,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssd  -536870910(%rcx,%r14,8), %ymm22, %ymm21
+
+// CHECK: vpdpwssds  (%rcx), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x53,0x29]
+          vpdpwssds  (%rcx), %ymm22, %ymm21
+
+// CHECK: vpdpwssds  -128(%rsp), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x53,0x6c,0x24,0xfc]
+          vpdpwssds  -128(%rsp), %ymm22, %ymm21
+
+// CHECK: vpdpwssds  128(%rsp), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x20,0x53,0x6c,0x24,0x04]
+          vpdpwssds  128(%rsp), %ymm22, %ymm21
+
+// CHECK: vpdpwssds  268435456(%rcx,%r14,8), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x53,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssds  268435456(%rcx,%r14,8), %ymm22, %ymm21
+
+// CHECK: vpdpwssds  -536870912(%rcx,%r14,8), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x53,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssds  -536870912(%rcx,%r14,8), %ymm22, %ymm21
+
+// CHECK: vpdpwssds  -536870910(%rcx,%r14,8), %ymm22, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x20,0x53,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssds  -536870910(%rcx,%r14,8), %ymm22, %ymm21
+
+// CHECK: vpdpbusd  (%rcx), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x50,0x09]
+          vpdpbusd  (%rcx), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusd  -128(%rsp), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x50,0x4c,0x24,0xfc]
+          vpdpbusd  -128(%rsp), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusd  128(%rsp), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x50,0x4c,0x24,0x04]
+          vpdpbusd  128(%rsp), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusd  268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x50,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusd  268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusd  -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x50,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusd  -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusd  -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x50,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusd  -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusds  (%rcx), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x51,0x09]
+          vpdpbusds  (%rcx), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusds  -128(%rsp), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x51,0x4c,0x24,0xfc]
+          vpdpbusds  -128(%rsp), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusds  128(%rsp), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x51,0x4c,0x24,0x04]
+          vpdpbusds  128(%rsp), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusds  268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x51,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusds  268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusds  -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x51,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusds  -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusds  -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x51,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusds  -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssd  (%rcx), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x52,0x09]
+          vpdpwssd  (%rcx), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssd  -128(%rsp), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x52,0x4c,0x24,0xfc]
+          vpdpwssd  -128(%rsp), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssd  128(%rsp), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x52,0x4c,0x24,0x04]
+          vpdpwssd  128(%rsp), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssd  268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x52,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssd  268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssd  -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x52,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssd  -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssd  -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x52,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssd  -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssds  (%rcx), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x53,0x09]
+          vpdpwssds  (%rcx), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssds  -128(%rsp), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x53,0x4c,0x24,0xfc]
+          vpdpwssds  -128(%rsp), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssds  128(%rsp), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x2a,0x53,0x4c,0x24,0x04]
+          vpdpwssds  128(%rsp), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssds  268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x53,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssds  268435456(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssds  -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x53,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssds  -536870912(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpwssds  -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x2a,0x53,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssds  -536870910(%rcx,%r14,8), %ymm2, %ymm1 {%k2}
+
+// CHECK: vpdpbusd  (%rcx), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x50,0x29]
+          vpdpbusd  (%rcx), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusd  -128(%rsp), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x50,0x6c,0x24,0xfc]
+          vpdpbusd  -128(%rsp), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusd  128(%rsp), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x50,0x6c,0x24,0x04]
+          vpdpbusd  128(%rsp), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusd  268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x50,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusd  268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusd  -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x50,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusd  -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusd  -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x50,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusd  -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusds  (%rcx), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x51,0x29]
+          vpdpbusds  (%rcx), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusds  -128(%rsp), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x51,0x6c,0x24,0xfc]
+          vpdpbusds  -128(%rsp), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusds  128(%rsp), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x51,0x6c,0x24,0x04]
+          vpdpbusds  128(%rsp), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusds  268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x51,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusds  268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusds  -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x51,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusds  -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpbusds  -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x51,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusds  -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssd  (%rcx), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x52,0x29]
+          vpdpwssd  (%rcx), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssd  -128(%rsp), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x52,0x6c,0x24,0xfc]
+          vpdpwssd  -128(%rsp), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssd  128(%rsp), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x52,0x6c,0x24,0x04]
+          vpdpwssd  128(%rsp), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssd  268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x52,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssd  268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssd  -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x52,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssd  -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssd  -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x52,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssd  -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssds  (%rcx), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x53,0x29]
+          vpdpwssds  (%rcx), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssds  -128(%rsp), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x53,0x6c,0x24,0xfc]
+          vpdpwssds  -128(%rsp), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssds  128(%rsp), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x22,0x53,0x6c,0x24,0x04]
+          vpdpwssds  128(%rsp), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssds  268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x53,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssds  268435456(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssds  -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x53,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssds  -536870912(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+
+// CHECK: vpdpwssds  -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x22,0x53,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssds  -536870910(%rcx,%r14,8), %ymm22, %ymm21 {%k2}
+

Added: llvm/trunk/test/MC/X86/avx512vnni-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/avx512vnni-encoding.s?rev=318746&view=auto
==============================================================================
--- llvm/trunk/test/MC/X86/avx512vnni-encoding.s (added)
+++ llvm/trunk/test/MC/X86/avx512vnni-encoding.s Tue Nov 21 02:04:28 2017
@@ -0,0 +1,450 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vnni --show-encoding < %s | FileCheck %s
+
+// CHECK: vpdpbusd %zmm3, %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x50,0xcb]
+          vpdpbusd %zmm3, %zmm2, %zmm1
+
+// CHECK: vpdpbusds %zmm3, %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x51,0xcb]
+          vpdpbusds %zmm3, %zmm2, %zmm1
+
+// CHECK: vpdpwssd %zmm3, %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x52,0xcb]
+          vpdpwssd %zmm3, %zmm2, %zmm1
+
+// CHECK: vpdpwssds %zmm3, %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x53,0xcb]
+          vpdpwssds %zmm3, %zmm2, %zmm1
+
+// CHECK: vpdpbusd %zmm23, %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x50,0xef]
+          vpdpbusd %zmm23, %zmm22, %zmm21
+
+// CHECK: vpdpbusds %zmm23, %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x51,0xef]
+          vpdpbusds %zmm23, %zmm22, %zmm21
+
+// CHECK: vpdpwssd %zmm23, %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x52,0xef]
+          vpdpwssd %zmm23, %zmm22, %zmm21
+
+// CHECK: vpdpwssds %zmm23, %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x53,0xef]
+          vpdpwssds %zmm23, %zmm22, %zmm21
+
+// CHECK: vpdpbusd %zmm3, %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x50,0xcb]
+          vpdpbusd %zmm3, %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusds %zmm3, %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x51,0xcb]
+          vpdpbusds %zmm3, %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssd %zmm3, %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x52,0xcb]
+          vpdpwssd %zmm3, %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssds %zmm3, %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x53,0xcb]
+          vpdpwssds %zmm3, %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusd %zmm23, %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x50,0xef]
+          vpdpbusd %zmm23, %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusds %zmm23, %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x51,0xef]
+          vpdpbusds %zmm23, %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssd %zmm23, %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x52,0xef]
+          vpdpwssd %zmm23, %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssds %zmm23, %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x53,0xef]
+          vpdpwssds %zmm23, %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusd  (%rcx), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x50,0x09]
+          vpdpbusd  (%rcx), %zmm2, %zmm1
+
+// CHECK: vpdpbusd  -256(%rsp), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x50,0x4c,0x24,0xfc]
+          vpdpbusd  -256(%rsp), %zmm2, %zmm1
+
+// CHECK: vpdpbusd  256(%rsp), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x50,0x4c,0x24,0x04]
+          vpdpbusd  256(%rsp), %zmm2, %zmm1
+
+// CHECK: vpdpbusd  268435456(%rcx,%r14,8), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x50,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusd  268435456(%rcx,%r14,8), %zmm2, %zmm1
+
+// CHECK: vpdpbusd  -536870912(%rcx,%r14,8), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x50,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusd  -536870912(%rcx,%r14,8), %zmm2, %zmm1
+
+// CHECK: vpdpbusd  -536870910(%rcx,%r14,8), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x50,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusd  -536870910(%rcx,%r14,8), %zmm2, %zmm1
+
+// CHECK: vpdpbusds  (%rcx), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x51,0x09]
+          vpdpbusds  (%rcx), %zmm2, %zmm1
+
+// CHECK: vpdpbusds  -256(%rsp), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x51,0x4c,0x24,0xfc]
+          vpdpbusds  -256(%rsp), %zmm2, %zmm1
+
+// CHECK: vpdpbusds  256(%rsp), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x51,0x4c,0x24,0x04]
+          vpdpbusds  256(%rsp), %zmm2, %zmm1
+
+// CHECK: vpdpbusds  268435456(%rcx,%r14,8), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x51,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusds  268435456(%rcx,%r14,8), %zmm2, %zmm1
+
+// CHECK: vpdpbusds  -536870912(%rcx,%r14,8), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x51,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusds  -536870912(%rcx,%r14,8), %zmm2, %zmm1
+
+// CHECK: vpdpbusds  -536870910(%rcx,%r14,8), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x51,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusds  -536870910(%rcx,%r14,8), %zmm2, %zmm1
+
+// CHECK: vpdpwssd  (%rcx), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x52,0x09]
+          vpdpwssd  (%rcx), %zmm2, %zmm1
+
+// CHECK: vpdpwssd  -256(%rsp), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x52,0x4c,0x24,0xfc]
+          vpdpwssd  -256(%rsp), %zmm2, %zmm1
+
+// CHECK: vpdpwssd  256(%rsp), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x52,0x4c,0x24,0x04]
+          vpdpwssd  256(%rsp), %zmm2, %zmm1
+
+// CHECK: vpdpwssd  268435456(%rcx,%r14,8), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x52,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssd  268435456(%rcx,%r14,8), %zmm2, %zmm1
+
+// CHECK: vpdpwssd  -536870912(%rcx,%r14,8), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x52,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssd  -536870912(%rcx,%r14,8), %zmm2, %zmm1
+
+// CHECK: vpdpwssd  -536870910(%rcx,%r14,8), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x52,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssd  -536870910(%rcx,%r14,8), %zmm2, %zmm1
+
+// CHECK: vpdpwssds  (%rcx), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x53,0x09]
+          vpdpwssds  (%rcx), %zmm2, %zmm1
+
+// CHECK: vpdpwssds  -256(%rsp), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x53,0x4c,0x24,0xfc]
+          vpdpwssds  -256(%rsp), %zmm2, %zmm1
+
+// CHECK: vpdpwssds  256(%rsp), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x6d,0x48,0x53,0x4c,0x24,0x04]
+          vpdpwssds  256(%rsp), %zmm2, %zmm1
+
+// CHECK: vpdpwssds  268435456(%rcx,%r14,8), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x53,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssds  268435456(%rcx,%r14,8), %zmm2, %zmm1
+
+// CHECK: vpdpwssds  -536870912(%rcx,%r14,8), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x53,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssds  -536870912(%rcx,%r14,8), %zmm2, %zmm1
+
+// CHECK: vpdpwssds  -536870910(%rcx,%r14,8), %zmm2, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x6d,0x48,0x53,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssds  -536870910(%rcx,%r14,8), %zmm2, %zmm1
+
+// CHECK: vpdpbusd  (%rcx), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x50,0x29]
+          vpdpbusd  (%rcx), %zmm22, %zmm21
+
+// CHECK: vpdpbusd  -256(%rsp), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x50,0x6c,0x24,0xfc]
+          vpdpbusd  -256(%rsp), %zmm22, %zmm21
+
+// CHECK: vpdpbusd  256(%rsp), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x50,0x6c,0x24,0x04]
+          vpdpbusd  256(%rsp), %zmm22, %zmm21
+
+// CHECK: vpdpbusd  268435456(%rcx,%r14,8), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x50,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusd  268435456(%rcx,%r14,8), %zmm22, %zmm21
+
+// CHECK: vpdpbusd  -536870912(%rcx,%r14,8), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x50,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusd  -536870912(%rcx,%r14,8), %zmm22, %zmm21
+
+// CHECK: vpdpbusd  -536870910(%rcx,%r14,8), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x50,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusd  -536870910(%rcx,%r14,8), %zmm22, %zmm21
+
+// CHECK: vpdpbusds  (%rcx), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x51,0x29]
+          vpdpbusds  (%rcx), %zmm22, %zmm21
+
+// CHECK: vpdpbusds  -256(%rsp), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x51,0x6c,0x24,0xfc]
+          vpdpbusds  -256(%rsp), %zmm22, %zmm21
+
+// CHECK: vpdpbusds  256(%rsp), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x51,0x6c,0x24,0x04]
+          vpdpbusds  256(%rsp), %zmm22, %zmm21
+
+// CHECK: vpdpbusds  268435456(%rcx,%r14,8), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x51,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusds  268435456(%rcx,%r14,8), %zmm22, %zmm21
+
+// CHECK: vpdpbusds  -536870912(%rcx,%r14,8), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x51,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusds  -536870912(%rcx,%r14,8), %zmm22, %zmm21
+
+// CHECK: vpdpbusds  -536870910(%rcx,%r14,8), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x51,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusds  -536870910(%rcx,%r14,8), %zmm22, %zmm21
+
+// CHECK: vpdpwssd  (%rcx), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x52,0x29]
+          vpdpwssd  (%rcx), %zmm22, %zmm21
+
+// CHECK: vpdpwssd  -256(%rsp), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x52,0x6c,0x24,0xfc]
+          vpdpwssd  -256(%rsp), %zmm22, %zmm21
+
+// CHECK: vpdpwssd  256(%rsp), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x52,0x6c,0x24,0x04]
+          vpdpwssd  256(%rsp), %zmm22, %zmm21
+
+// CHECK: vpdpwssd  268435456(%rcx,%r14,8), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x52,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssd  268435456(%rcx,%r14,8), %zmm22, %zmm21
+
+// CHECK: vpdpwssd  -536870912(%rcx,%r14,8), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x52,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssd  -536870912(%rcx,%r14,8), %zmm22, %zmm21
+
+// CHECK: vpdpwssd  -536870910(%rcx,%r14,8), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x52,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssd  -536870910(%rcx,%r14,8), %zmm22, %zmm21
+
+// CHECK: vpdpwssds  (%rcx), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x53,0x29]
+          vpdpwssds  (%rcx), %zmm22, %zmm21
+
+// CHECK: vpdpwssds  -256(%rsp), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x53,0x6c,0x24,0xfc]
+          vpdpwssds  -256(%rsp), %zmm22, %zmm21
+
+// CHECK: vpdpwssds  256(%rsp), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x4d,0x40,0x53,0x6c,0x24,0x04]
+          vpdpwssds  256(%rsp), %zmm22, %zmm21
+
+// CHECK: vpdpwssds  268435456(%rcx,%r14,8), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x53,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssds  268435456(%rcx,%r14,8), %zmm22, %zmm21
+
+// CHECK: vpdpwssds  -536870912(%rcx,%r14,8), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x53,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssds  -536870912(%rcx,%r14,8), %zmm22, %zmm21
+
+// CHECK: vpdpwssds  -536870910(%rcx,%r14,8), %zmm22, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x4d,0x40,0x53,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssds  -536870910(%rcx,%r14,8), %zmm22, %zmm21
+
+// CHECK: vpdpbusd  (%rcx), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x50,0x09]
+          vpdpbusd  (%rcx), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusd  -256(%rsp), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x50,0x4c,0x24,0xfc]
+          vpdpbusd  -256(%rsp), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusd  256(%rsp), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x50,0x4c,0x24,0x04]
+          vpdpbusd  256(%rsp), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusd  268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x50,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusd  268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusd  -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x50,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusd  -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusd  -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x50,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusd  -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusds  (%rcx), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x51,0x09]
+          vpdpbusds  (%rcx), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusds  -256(%rsp), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x51,0x4c,0x24,0xfc]
+          vpdpbusds  -256(%rsp), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusds  256(%rsp), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x51,0x4c,0x24,0x04]
+          vpdpbusds  256(%rsp), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusds  268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x51,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusds  268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusds  -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x51,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusds  -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusds  -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x51,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusds  -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssd  (%rcx), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x52,0x09]
+          vpdpwssd  (%rcx), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssd  -256(%rsp), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x52,0x4c,0x24,0xfc]
+          vpdpwssd  -256(%rsp), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssd  256(%rsp), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x52,0x4c,0x24,0x04]
+          vpdpwssd  256(%rsp), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssd  268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x52,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssd  268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssd  -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x52,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssd  -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssd  -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x52,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssd  -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssds  (%rcx), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x53,0x09]
+          vpdpwssds  (%rcx), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssds  -256(%rsp), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x53,0x4c,0x24,0xfc]
+          vpdpwssds  -256(%rsp), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssds  256(%rsp), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x6d,0x4a,0x53,0x4c,0x24,0x04]
+          vpdpwssds  256(%rsp), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssds  268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x53,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssds  268435456(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssds  -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x53,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssds  -536870912(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpwssds  -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x6d,0x4a,0x53,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssds  -536870910(%rcx,%r14,8), %zmm2, %zmm1 {%k2}
+
+// CHECK: vpdpbusd  (%rcx), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x50,0x29]
+          vpdpbusd  (%rcx), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusd  -256(%rsp), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x50,0x6c,0x24,0xfc]
+          vpdpbusd  -256(%rsp), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusd  256(%rsp), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x50,0x6c,0x24,0x04]
+          vpdpbusd  256(%rsp), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusd  268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x50,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusd  268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusd  -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x50,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusd  -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusd  -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x50,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusd  -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusds  (%rcx), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x51,0x29]
+          vpdpbusds  (%rcx), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusds  -256(%rsp), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x51,0x6c,0x24,0xfc]
+          vpdpbusds  -256(%rsp), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusds  256(%rsp), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x51,0x6c,0x24,0x04]
+          vpdpbusds  256(%rsp), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusds  268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x51,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpbusds  268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusds  -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x51,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpbusds  -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpbusds  -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x51,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpbusds  -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssd  (%rcx), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x52,0x29]
+          vpdpwssd  (%rcx), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssd  -256(%rsp), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x52,0x6c,0x24,0xfc]
+          vpdpwssd  -256(%rsp), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssd  256(%rsp), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x52,0x6c,0x24,0x04]
+          vpdpwssd  256(%rsp), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssd  268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x52,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssd  268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssd  -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x52,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssd  -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssd  -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x52,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssd  -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssds  (%rcx), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x53,0x29]
+          vpdpwssds  (%rcx), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssds  -256(%rsp), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x53,0x6c,0x24,0xfc]
+          vpdpwssds  -256(%rsp), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssds  256(%rsp), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x4d,0x42,0x53,0x6c,0x24,0x04]
+          vpdpwssds  256(%rsp), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssds  268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x53,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpdpwssds  268435456(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssds  -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x53,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpdpwssds  -536870912(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+
+// CHECK: vpdpwssds  -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x4d,0x42,0x53,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpdpwssds  -536870910(%rcx,%r14,8), %zmm22, %zmm21 {%k2}
+




More information about the llvm-commits mailing list