[llvm] r318745 - [x86][icelake]vbmi2

Coby Tayree via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 21 01:48:44 PST 2017


Author: coby
Date: Tue Nov 21 01:48:44 2017
New Revision: 318745

URL: http://llvm.org/viewvc/llvm-project?rev=318745&view=rev
Log:
[x86][icelake]vbmi2
introducing vbmi2, consisting of
vpcompress{b,w}
vpexpand{b,w}
vpsh{l,r}d{w,d,q}
vpsh{l,r}dv{w,d,q}
Differential Revision: https://reviews.llvm.org/D40206

Added:
    llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll
    llvm/trunk/test/MC/X86/avx512vbmi2-encoding.s
    llvm/trunk/test/MC/X86/avx512vbmi2vl-encoding.s
Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/Support/Host.cpp
    llvm/trunk/lib/Target/X86/X86.td
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/lib/Target/X86/X86InstrInfo.td
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/lib/Target/X86/X86Subtarget.cpp
    llvm/trunk/lib/Target/X86/X86Subtarget.h

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=318745&r1=318744&r2=318745&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Tue Nov 21 01:48:44 2017
@@ -5164,6 +5164,56 @@ let TargetPrefix = "x86" in {
         Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty,
                    llvm_i8_ty], [IntrArgMemOnly]>;
 
+  def int_x86_avx512_mask_compress_b_512 :
+                             GCCBuiltin<"__builtin_ia32_compressqi512_mask">,
+        Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
+                   llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_compress_w_512 :
+                             GCCBuiltin<"__builtin_ia32_compresshi512_mask">,
+        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
+                   llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_compress_b_256 :
+                             GCCBuiltin<"__builtin_ia32_compressqi256_mask">,
+        Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
+                   llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_compress_w_256 :
+                             GCCBuiltin<"__builtin_ia32_compresshi256_mask">,
+        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
+                   llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_compress_b_128 :
+                             GCCBuiltin<"__builtin_ia32_compressqi128_mask">,
+        Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
+                   llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_compress_w_128 :
+                             GCCBuiltin<"__builtin_ia32_compresshi128_mask">,
+        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_compress_store_b_512 :
+                            GCCBuiltin<"__builtin_ia32_compressstoreqi512_mask">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v64i8_ty,
+                   llvm_i64_ty], [IntrArgMemOnly]>;
+  def int_x86_avx512_mask_compress_store_w_512 :
+                            GCCBuiltin<"__builtin_ia32_compressstorehi512_mask">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty,
+                   llvm_i32_ty], [IntrArgMemOnly]>;
+  def int_x86_avx512_mask_compress_store_b_256 :
+                            GCCBuiltin<"__builtin_ia32_compressstoreqi256_mask">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty,
+                   llvm_i32_ty], [IntrArgMemOnly]>;
+  def int_x86_avx512_mask_compress_store_w_256 :
+                            GCCBuiltin<"__builtin_ia32_compressstorehi256_mask">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty,
+                   llvm_i16_ty], [IntrArgMemOnly]>;
+  def int_x86_avx512_mask_compress_store_b_128 :
+                            GCCBuiltin<"__builtin_ia32_compressstoreqi128_mask">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v16i8_ty,
+                   llvm_i16_ty], [IntrArgMemOnly]>;
+  def int_x86_avx512_mask_compress_store_w_128 :
+                            GCCBuiltin<"__builtin_ia32_compressstorehi128_mask">,
+        Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty,
+                   llvm_i8_ty], [IntrArgMemOnly]>;
+
 // expand
   def int_x86_avx512_mask_expand_ps_512 :
                              GCCBuiltin<"__builtin_ia32_expandsf512_mask">,
@@ -5265,6 +5315,304 @@ let TargetPrefix = "x86" in {
         Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty, llvm_v2i64_ty,
                    llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
 
+  def int_x86_avx512_mask_expand_b_512 :
+                            GCCBuiltin<"__builtin_ia32_expandqi512_mask">,
+        Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
+                   llvm_i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_expand_w_512 :
+                            GCCBuiltin<"__builtin_ia32_expandhi512_mask">,
+        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
+                   llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_expand_b_256 :
+                            GCCBuiltin<"__builtin_ia32_expandqi256_mask">,
+        Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
+                   llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_expand_w_256 :
+                            GCCBuiltin<"__builtin_ia32_expandhi256_mask">,
+        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
+                   llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_expand_b_128 :
+                            GCCBuiltin<"__builtin_ia32_expandqi128_mask">,
+        Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
+                   llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_expand_w_128 :
+                            GCCBuiltin<"__builtin_ia32_expandhi128_mask">,
+        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_expand_load_b_512 :
+                            GCCBuiltin<"__builtin_ia32_expandloadqi512_mask">,
+        Intrinsic<[llvm_v64i8_ty], [llvm_ptr_ty, llvm_v64i8_ty,
+                   llvm_i64_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_expand_load_w_512 :
+                            GCCBuiltin<"__builtin_ia32_expandloadhi512_mask">,
+        Intrinsic<[llvm_v32i16_ty], [llvm_ptr_ty, llvm_v32i16_ty,
+                   llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_expand_load_b_256 :
+                            GCCBuiltin<"__builtin_ia32_expandloadqi256_mask">,
+        Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty, llvm_v32i8_ty,
+                   llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_expand_load_w_256 :
+                            GCCBuiltin<"__builtin_ia32_expandloadhi256_mask">,
+        Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty, llvm_v16i16_ty,
+                   llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_expand_load_b_128 :
+                            GCCBuiltin<"__builtin_ia32_expandloadqi128_mask">,
+        Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_v16i8_ty,
+                   llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_expand_load_w_128 :
+                            GCCBuiltin<"__builtin_ia32_expandloadhi128_mask">,
+        Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty, llvm_v8i16_ty,
+                   llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+}
+
+// VBMI2 Concat & Shift
+let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
+  def int_x86_avx512_mask_vpshld_q_512 :
+        GCCBuiltin<"__builtin_ia32_vpshldq512_mask">,
+        Intrinsic<[llvm_v8i64_ty],
+                  [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshld_q_256 :
+        GCCBuiltin<"__builtin_ia32_vpshldq256_mask">,
+        Intrinsic<[llvm_v4i64_ty],
+                  [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshld_q_128 :
+        GCCBuiltin<"__builtin_ia32_vpshldq128_mask">,
+        Intrinsic<[llvm_v2i64_ty],
+                  [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpshld_d_512 :
+        GCCBuiltin<"__builtin_ia32_vpshldd512_mask">,
+        Intrinsic<[llvm_v16i32_ty],
+                  [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty,
+                   llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshld_d_256 :
+        GCCBuiltin<"__builtin_ia32_vpshldd256_mask">,
+        Intrinsic<[llvm_v8i32_ty],
+                  [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshld_d_128 :
+        GCCBuiltin<"__builtin_ia32_vpshldd128_mask">,
+        Intrinsic<[llvm_v4i32_ty],
+                  [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpshld_w_512 :
+        GCCBuiltin<"__builtin_ia32_vpshldw512_mask">,
+        Intrinsic<[llvm_v32i16_ty],
+                  [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_v32i16_ty,
+                   llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshld_w_256 :
+        GCCBuiltin<"__builtin_ia32_vpshldw256_mask">,
+        Intrinsic<[llvm_v16i16_ty],
+                  [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty, llvm_v16i16_ty,
+                   llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshld_w_128 :
+        GCCBuiltin<"__builtin_ia32_vpshldw128_mask">,
+        Intrinsic<[llvm_v8i16_ty],
+                  [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpshrd_q_512 :
+        GCCBuiltin<"__builtin_ia32_vpshrdq512_mask">,
+        Intrinsic<[llvm_v8i64_ty],
+                  [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshrd_q_256 :
+        GCCBuiltin<"__builtin_ia32_vpshrdq256_mask">,
+        Intrinsic<[llvm_v4i64_ty],
+                  [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshrd_q_128 :
+        GCCBuiltin<"__builtin_ia32_vpshrdq128_mask">,
+        Intrinsic<[llvm_v2i64_ty],
+                  [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpshrd_d_512 :
+        GCCBuiltin<"__builtin_ia32_vpshrdd512_mask">,
+        Intrinsic<[llvm_v16i32_ty],
+                  [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty,
+                   llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshrd_d_256 :
+        GCCBuiltin<"__builtin_ia32_vpshrdd256_mask">,
+        Intrinsic<[llvm_v8i32_ty],
+                  [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshrd_d_128 :
+        GCCBuiltin<"__builtin_ia32_vpshrdd128_mask">,
+        Intrinsic<[llvm_v4i32_ty],
+                  [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpshrd_w_512 :
+        GCCBuiltin<"__builtin_ia32_vpshrdw512_mask">,
+        Intrinsic<[llvm_v32i16_ty],
+                  [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty, llvm_v32i16_ty,
+                   llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshrd_w_256 :
+        GCCBuiltin<"__builtin_ia32_vpshrdw256_mask">,
+        Intrinsic<[llvm_v16i16_ty],
+                  [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty, llvm_v16i16_ty,
+                   llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshrd_w_128 :
+        GCCBuiltin<"__builtin_ia32_vpshrdw128_mask">,
+        Intrinsic<[llvm_v8i16_ty],
+                  [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty, llvm_v8i16_ty,
+                   llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpshldv_w_128 :
+        GCCBuiltin<"__builtin_ia32_vpshldvw128_mask">,
+        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                   llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshldv_w_128 :
+        GCCBuiltin<"__builtin_ia32_vpshldvw128_maskz">,
+        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                   llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshldv_w_256 :
+        GCCBuiltin<"__builtin_ia32_vpshldvw256_mask">,
+        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
+                   llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshldv_w_256 :
+        GCCBuiltin<"__builtin_ia32_vpshldvw256_maskz">,
+        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
+                   llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshldv_w_512 :
+        GCCBuiltin<"__builtin_ia32_vpshldvw512_mask">,
+        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
+                   llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshldv_w_512 :
+        GCCBuiltin<"__builtin_ia32_vpshldvw512_maskz">,
+        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
+                   llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpshldv_q_128 :
+        GCCBuiltin<"__builtin_ia32_vpshldvq128_mask">,
+        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                   llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshldv_q_128 :
+        GCCBuiltin<"__builtin_ia32_vpshldvq128_maskz">,
+        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                   llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshldv_q_256 :
+        GCCBuiltin<"__builtin_ia32_vpshldvq256_mask">,
+        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+                   llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshldv_q_256 :
+        GCCBuiltin<"__builtin_ia32_vpshldvq256_maskz">,
+        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+                   llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshldv_q_512 :
+        GCCBuiltin<"__builtin_ia32_vpshldvq512_mask">,
+        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
+                   llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshldv_q_512 :
+        GCCBuiltin<"__builtin_ia32_vpshldvq512_maskz">,
+        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
+                   llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpshldv_d_128 :
+        GCCBuiltin<"__builtin_ia32_vpshldvd128_mask">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                   llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshldv_d_128 :
+        GCCBuiltin<"__builtin_ia32_vpshldvd128_maskz">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                   llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshldv_d_256 :
+        GCCBuiltin<"__builtin_ia32_vpshldvd256_mask">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                   llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshldv_d_256 :
+        GCCBuiltin<"__builtin_ia32_vpshldvd256_maskz">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                   llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshldv_d_512 :
+        GCCBuiltin<"__builtin_ia32_vpshldvd512_mask">,
+        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                   llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshldv_d_512 :
+        GCCBuiltin<"__builtin_ia32_vpshldvd512_maskz">,
+        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                   llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpshrdv_w_128 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvw128_mask">,
+        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                   llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshrdv_w_128 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvw128_maskz">,
+        Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
+                   llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshrdv_w_256 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvw256_mask">,
+        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
+                   llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshrdv_w_256 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvw256_maskz">,
+        Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
+                   llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshrdv_w_512 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvw512_mask">,
+        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
+                   llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshrdv_w_512 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvw512_maskz">,
+        Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
+                   llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpshrdv_q_128 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvq128_mask">,
+        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                   llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshrdv_q_128 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvq128_maskz">,
+        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
+                   llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshrdv_q_256 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvq256_mask">,
+        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+                   llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshrdv_q_256 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvq256_maskz">,
+        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
+                   llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshrdv_q_512 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvq512_mask">,
+        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
+                   llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshrdv_q_512 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvq512_maskz">,
+        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
+                   llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask_vpshrdv_d_128 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvd128_mask">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                   llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshrdv_d_128 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvd128_maskz">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
+                   llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshrdv_d_256 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvd256_mask">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                   llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshrdv_d_256 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvd256_maskz">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
+                   llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_mask_vpshrdv_d_512 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvd512_mask">,
+        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                   llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_maskz_vpshrdv_d_512 :
+        GCCBuiltin<"__builtin_ia32_vpshrdvd512_maskz">,
+        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
+                   llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
 }
 
 // truncate

Modified: llvm/trunk/lib/Support/Host.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Host.cpp?rev=318745&r1=318744&r2=318745&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Host.cpp (original)
+++ llvm/trunk/lib/Support/Host.cpp Tue Nov 21 01:48:44 2017
@@ -1258,6 +1258,7 @@ bool sys::getHostCPUFeatures(StringMap<b
 
   Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
   Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
+  Features["avx512vbmi2"]     = HasLeaf7 && ((ECX >>  6) & 1) && HasAVX512Save;
   Features["avx512vpopcntdq"] = HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save;
   Features["pku"]             = HasLeaf7 && ((ECX >> 4) & 1);
   Features["vaes"] = HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave;

Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=318745&r1=318744&r2=318745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Tue Nov 21 01:48:44 2017
@@ -152,6 +152,9 @@ def FeatureVLX     : SubtargetFeature<"a
 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
                       "Enable AVX-512 Vector Byte Manipulation Instructions",
                                       [FeatureBWI]>;
+def FeatureVBMI2    : SubtargetFeature<"avx512vbmi2", "HasVBMI2", "true",
+                      "Enable AVX-512 further Vector Byte Manipulation Instructions",
+                                      [FeatureBWI]>;
 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
                       "Enable AVX-512 Integer Fused Multiple-Add",
                                       [FeatureAVX512]>;

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=318745&r1=318744&r2=318745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Nov 21 01:48:44 2017
@@ -25090,6 +25090,10 @@ const char *X86TargetLowering::getTarget
   case X86ISD::PACKUS:             return "X86ISD::PACKUS";
   case X86ISD::PALIGNR:            return "X86ISD::PALIGNR";
   case X86ISD::VALIGN:             return "X86ISD::VALIGN";
+  case X86ISD::VSHLD:              return "X86ISD::VSHLD";
+  case X86ISD::VSHRD:              return "X86ISD::VSHRD";
+  case X86ISD::VSHLDV:             return "X86ISD::VSHLDV";
+  case X86ISD::VSHRDV:             return "X86ISD::VSHRDV";
   case X86ISD::PSHUFD:             return "X86ISD::PSHUFD";
   case X86ISD::PSHUFHW:            return "X86ISD::PSHUFHW";
   case X86ISD::PSHUFLW:            return "X86ISD::PSHUFLW";

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=318745&r1=318744&r2=318745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Nov 21 01:48:44 2017
@@ -391,6 +391,11 @@ namespace llvm {
       PSHUFHW,
       PSHUFLW,
       SHUFP,
+      // VBMI2 Concat & Shift.
+      VSHLD,
+      VSHRD,
+      VSHLDV,
+      VSHRDV,
       //Shuffle Packed Values at 128-bit granularity.
       SHUF128,
       MOVDDUP,

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=318745&r1=318744&r2=318745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Nov 21 01:48:44 2017
@@ -8491,11 +8491,13 @@ multiclass compress_by_vec_width_lowerin
 }
 
 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
-                                 AVX512VLVectorVTInfo VTInfo> {
+                                 AVX512VLVectorVTInfo VTInfo,
+                                 Predicate Pred = HasAVX512> {
+  let Predicates = [Pred] in
   defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr>,
            compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
 
-  let Predicates = [HasVLX] in {
+  let Predicates = [Pred, HasVLX] in {
     defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr>,
                 compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
     defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr>,
@@ -8539,11 +8541,13 @@ multiclass expand_by_vec_width_lowering<
 }
 
 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
-                                 AVX512VLVectorVTInfo VTInfo> {
+                               AVX512VLVectorVTInfo VTInfo,
+                               Predicate Pred = HasAVX512> {
+  let Predicates = [Pred] in
   defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr>,
            expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
 
-  let Predicates = [HasVLX] in {
+  let Predicates = [Pred, HasVLX] in {
     defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr>,
                 expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
     defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr>,
@@ -8748,12 +8752,13 @@ multiclass avx512_common_fp_sae_packed_i
 }
 
 multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
-                   AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo>{
-  let Predicates = [HasBWI] in {
+                   AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo,
+                   Predicate Pred = HasBWI> {
+  let Predicates = [Pred] in {
     defm Z    : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
                            SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
   }
-  let Predicates = [HasBWI, HasVLX] in {
+  let Predicates = [Pred, HasVLX] in {
     defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
                            SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
     defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode,  DestInfo.info256,
@@ -8762,11 +8767,12 @@ multiclass avx512_common_3Op_rm_imm8<bit
 }
 
 multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
-                                bits<8> opc, SDNode OpNode>{
-  let Predicates = [HasAVX512] in {
+                                  bits<8> opc, SDNode OpNode,
+                                  Predicate Pred = HasAVX512> {
+  let Predicates = [Pred] in {
     defm Z    : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
   }
-  let Predicates = [HasAVX512, HasVLX] in {
+  let Predicates = [Pred, HasVLX] in {
     defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
     defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
   }
@@ -10063,3 +10069,94 @@ defm : vpclmulqdq_aliases<"VPCLMULQDQZ",
 defm : vpclmulqdq_aliases<"VPCLMULQDQZ128", VR128X, i128mem>;
 defm : vpclmulqdq_aliases<"VPCLMULQDQZ256", VR256X, i256mem>;
 
+//===----------------------------------------------------------------------===//
+// VBMI2
+//===----------------------------------------------------------------------===//
+
+multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
+                              X86VectorVTInfo VTI> {
+  let Constraints = "$src1 = $dst",
+      ExeDomain   = VTI.ExeDomain in {
+    defm r:   AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
+                (ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
+                "$src3, $src2", "$src2, $src3",
+                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
+                AVX512FMA3Base;
+    defm m:   AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
+                (ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
+                "$src3, $src2", "$src2, $src3",
+                (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
+                        (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
+                AVX512FMA3Base;
+  }
+}
+
+multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
+                               X86VectorVTInfo VTI>
+         : VBMI2_shift_var_rm<Op, OpStr, OpNode, VTI> {
+  let Constraints = "$src1 = $dst",
+      ExeDomain   = VTI.ExeDomain in
+  defm mb:  AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
+              (ins VTI.RC:$src2, VTI.ScalarMemOp:$src3), OpStr,
+              "${src3}"##VTI.BroadcastStr##", $src2",
+              "$src2, ${src3}"##VTI.BroadcastStr,
+              (OpNode VTI.RC:$src1, VTI.RC:$src2,
+               (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
+              AVX512FMA3Base, EVEX_B;
+}
+
+multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
+                                     AVX512VLVectorVTInfo VTI> {
+  let Predicates = [HasVBMI2] in
+  defm Z      : VBMI2_shift_var_rm<Op, OpStr, OpNode, VTI.info512>, EVEX_V512;
+  let Predicates = [HasVBMI2, HasVLX] in {
+    defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, VTI.info256>, EVEX_V256;
+    defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, VTI.info128>, EVEX_V128;
+  }
+}
+
+multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
+                                      AVX512VLVectorVTInfo VTI> {
+  let Predicates = [HasVBMI2] in
+  defm Z      : VBMI2_shift_var_rmb<Op, OpStr, OpNode, VTI.info512>, EVEX_V512;
+  let Predicates = [HasVBMI2, HasVLX] in {
+    defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, VTI.info256>, EVEX_V256;
+    defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, VTI.info128>, EVEX_V128;
+  }
+}
+multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
+                           SDNode OpNode> {
+  defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode,
+             avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
+  defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode,
+             avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+  defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode,
+             avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
+                           SDNode OpNode> {
+  defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", avx512vl_i16_info,
+             avx512vl_i16_info, HasVBMI2>, VEX_W, EVEX_CD8<16, CD8VF>;
+  defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
+             OpNode, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+  defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
+             HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+}
+
+// Concat & Shift
+defm VPSHLDV     : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv>;
+defm VPSHRDV     : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv>;
+defm VPSHLD      : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld>;
+defm VPSHRD      : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd>;
+// Compress
+defm VPCOMPRESSB : compress_by_elt_width <0x63, "vpcompressb", avx512vl_i8_info,
+                                          HasVBMI2>, EVEX;
+defm VPCOMPRESSW : compress_by_elt_width <0x63, "vpcompressw", avx512vl_i16_info,
+                                          HasVBMI2>, EVEX, VEX_W;
+// Expand
+defm VPEXPANDB : expand_by_elt_width <0x62, "vpexpandb", avx512vl_i8_info,
+                                      HasVBMI2>, EVEX;
+defm VPEXPANDW : expand_by_elt_width <0x62, "vpexpandw", avx512vl_i16_info,
+                                      HasVBMI2>, EVEX, VEX_W;
+

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=318745&r1=318744&r2=318745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Nov 21 01:48:44 2017
@@ -350,6 +350,19 @@ def X86PAlignr : SDNode<"X86ISD::PALIGNR
                                              SDTCisVT<3, i8>]>>;
 def X86VAlign  : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>;
 
+def X86VShld   : SDNode<"X86ISD::VSHLD", SDTShuff3OpI>;
+def X86VShrd   : SDNode<"X86ISD::VSHRD", SDTShuff3OpI>;
+def X86VShldv  : SDNode<"X86ISD::VSHLDV",
+                        SDTypeProfile<1, 3, [SDTCisVec<0>,
+                                             SDTCisSameAs<0,1>,
+                                             SDTCisSameAs<0,2>,
+                                             SDTCisSameAs<0,3>]>>;
+def X86VShrdv  : SDNode<"X86ISD::VSHRDV",
+                        SDTypeProfile<1, 3, [SDTCisVec<0>,
+                                             SDTCisSameAs<0,1>,
+                                             SDTCisSameAs<0,2>,
+                                             SDTCisSameAs<0,3>]>>;
+
 def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>;
 
 def X86PShufd  : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>;

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=318745&r1=318744&r2=318745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Tue Nov 21 01:48:44 2017
@@ -862,6 +862,7 @@ def HasBMI2      : Predicate<"Subtarget-
 def NoBMI2       : Predicate<"!Subtarget->hasBMI2()">;
 def HasVBMI      : Predicate<"Subtarget->hasVBMI()">,
                      AssemblerPredicate<"FeatureVBMI", "AVX-512 VBMI ISA">;
+def HasVBMI2     : Predicate<"Subtarget->hasVBMI2()">;
 def HasIFMA      : Predicate<"Subtarget->hasIFMA()">,
                      AssemblerPredicate<"FeatureIFMA", "AVX-512 IFMA ISA">;
 def HasRTM       : Predicate<"Subtarget->hasRTM()">;

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=318745&r1=318744&r2=318745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Tue Nov 21 01:48:44 2017
@@ -120,6 +120,12 @@ static const IntrinsicData IntrinsicsWit
   X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
                      X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
 
+  X86_INTRINSIC_DATA(avx512_mask_compress_store_b_128,
+                     COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_compress_store_b_256,
+                     COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_compress_store_b_512,
+                     COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
   X86_INTRINSIC_DATA(avx512_mask_compress_store_d_128,
                      COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
   X86_INTRINSIC_DATA(avx512_mask_compress_store_d_256,
@@ -144,6 +150,18 @@ static const IntrinsicData IntrinsicsWit
                      COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
   X86_INTRINSIC_DATA(avx512_mask_compress_store_q_512,
                      COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_compress_store_w_128,
+                     COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_compress_store_w_256,
+                     COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_compress_store_w_512,
+                     COMPRESS_TO_MEM, X86ISD::COMPRESS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_expand_load_b_128,
+                     EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_expand_load_b_256,
+                     EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_expand_load_b_512,
+                     EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_load_d_128,
                      EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_load_d_256,
@@ -168,6 +186,12 @@ static const IntrinsicData IntrinsicsWit
                      EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
                      EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_expand_load_w_128,
+                     EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_expand_load_w_256,
+                     EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_expand_load_w_512,
+                     EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
                      X86ISD::VTRUNC, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
@@ -479,6 +503,13 @@ static const IntrinsicData  IntrinsicsWi
                      X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
   X86_INTRINSIC_DATA(avx512_mask_cmp_ss,     CMP_MASK_SCALAR_CC,
                      X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
+
+  X86_INTRINSIC_DATA(avx512_mask_compress_b_128,  COMPRESS_EXPAND_IN_REG,
+                     X86ISD::COMPRESS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_compress_b_256,  COMPRESS_EXPAND_IN_REG,
+                     X86ISD::COMPRESS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_compress_b_512,  COMPRESS_EXPAND_IN_REG,
+                     X86ISD::COMPRESS, 0),
   X86_INTRINSIC_DATA(avx512_mask_compress_d_128,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::COMPRESS, 0),
   X86_INTRINSIC_DATA(avx512_mask_compress_d_256,  COMPRESS_EXPAND_IN_REG,
@@ -503,6 +534,12 @@ static const IntrinsicData  IntrinsicsWi
                      X86ISD::COMPRESS, 0),
   X86_INTRINSIC_DATA(avx512_mask_compress_q_512,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::COMPRESS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_compress_w_128,  COMPRESS_EXPAND_IN_REG,
+                     X86ISD::COMPRESS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_compress_w_256,  COMPRESS_EXPAND_IN_REG,
+                     X86ISD::COMPRESS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_compress_w_512,  COMPRESS_EXPAND_IN_REG,
+                     X86ISD::COMPRESS, 0),
   X86_INTRINSIC_DATA(avx512_mask_conflict_d_128, INTR_TYPE_1OP_MASK,
                      X86ISD::CONFLICT, 0),
   X86_INTRINSIC_DATA(avx512_mask_conflict_d_256, INTR_TYPE_1OP_MASK,
@@ -677,6 +714,12 @@ static const IntrinsicData  IntrinsicsWi
                      X86ISD::FDIVS_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM,
                      X86ISD::FDIVS_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_expand_b_128,  COMPRESS_EXPAND_IN_REG,
+                     X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_expand_b_256,  COMPRESS_EXPAND_IN_REG,
+                     X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_expand_b_512,  COMPRESS_EXPAND_IN_REG,
+                     X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_d_128,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_d_256,  COMPRESS_EXPAND_IN_REG,
@@ -701,6 +744,12 @@ static const IntrinsicData  IntrinsicsWi
                      X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_q_512,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_expand_w_128,  COMPRESS_EXPAND_IN_REG,
+                     X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_expand_w_256,  COMPRESS_EXPAND_IN_REG,
+                     X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_expand_w_512,  COMPRESS_EXPAND_IN_REG,
+                     X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_128, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
   X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_256, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
   X86_INTRINSIC_DATA(avx512_mask_fixupimm_pd_512, FIXUPIMM, X86ISD::VFIXUPIMM, 0),
@@ -1192,6 +1241,44 @@ static const IntrinsicData  IntrinsicsWi
                      X86ISD::VPMADD52L, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpmadd52l_uq_512 , IFMA_OP_MASK,
                      X86ISD::VPMADD52L, 0),
+
+  X86_INTRINSIC_DATA(avx512_mask_vpshld_d_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshld_d_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshld_d_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshld_q_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshld_q_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshld_q_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshld_w_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshld_w_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshld_w_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHLD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshldv_d_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshldv_q_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_128, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_256, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshldv_w_512, FMA_OP_MASK, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrd_d_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrd_d_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrd_d_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrd_q_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrd_q_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrd_q_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrd_w_128, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrd_w_256, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrd_w_512, INTR_TYPE_3OP_IMM8_MASK, X86ISD::VSHRD, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_d_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_q_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_128, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_256, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vpshrdv_w_512, FMA_OP_MASK, X86ISD::VSHRDV, 0),
+
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, ISD::FMA, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, ISD::FMA, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, ISD::FMA,
@@ -1338,6 +1425,26 @@ static const IntrinsicData  IntrinsicsWi
                      X86ISD::VPMADD52L, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_512, IFMA_OP_MASKZ,
                      X86ISD::VPMADD52L, 0),
+
+  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_d_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_q_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_128, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_256, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshldv_w_512, FMA_OP_MASKZ, X86ISD::VSHLDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_d_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_q_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_128, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_256, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vpshrdv_w_512, FMA_OP_MASKZ, X86ISD::VSHRDV, 0),
+
   X86_INTRINSIC_DATA(avx512_packssdw_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
   X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=318745&r1=318744&r2=318745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Tue Nov 21 01:48:44 2017
@@ -313,6 +313,7 @@ void X86Subtarget::initializeEnvironment
   HasBMI = false;
   HasBMI2 = false;
   HasVBMI = false;
+  HasVBMI2 = false;
   HasIFMA = false;
   HasRTM = false;
   HasERI = false;

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=318745&r1=318744&r2=318745&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Tue Nov 21 01:48:44 2017
@@ -167,6 +167,9 @@ protected:
   /// Processor has VBMI instructions.
   bool HasVBMI;
 
+  /// Processor has VBMI2 instructions.
+  bool HasVBMI2;
+
   /// Processor has Integer Fused Multiply Add
   bool HasIFMA;
 
@@ -483,6 +486,7 @@ public:
   bool hasBMI() const { return HasBMI; }
   bool hasBMI2() const { return HasBMI2; }
   bool hasVBMI() const { return HasVBMI; }
+  bool hasVBMI2() const { return HasVBMI2; }
   bool hasIFMA() const { return HasIFMA; }
   bool hasRTM() const { return HasRTM; }
   bool hasADX() const { return HasADX; }

Added: llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll?rev=318745&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi2-intrinsics.ll Tue Nov 21 01:48:44 2017
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vbmi2 | FileCheck %s
+
+define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
+; CHECK-LABEL: test_expand_load_w_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vpexpandw (%rdi), %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
+  ret <32 x i16> %res
+}
+declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
+
+define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
+; CHECK-LABEL: test_compress_store_w_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vpcompressw %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
+  ret void
+}
+declare void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
+
+define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
+; CHECK-LABEL: test_expand_load_b_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovq %rsi, %k1
+; CHECK-NEXT:    vpexpandb (%rdi), %zmm0 {%k1}
+; CHECK-NEXT:    retq
+  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
+  ret <64 x i8> %res
+}
+declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
+
+define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
+; CHECK-LABEL: test_compress_store_b_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovq %rsi, %k1
+; CHECK-NEXT:    vpcompressb %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
+  ret void
+}
+declare void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
+
+define <32 x i16> @test_compress_w_512(<32 x i16> %data, <32 x i16> %src, i32 %mask) {
+; CHECK-LABEL: test_compress_w_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpcompressw %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src, i32 %mask)
+  ret <32 x i16> %res
+}
+declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32)
+
+define <64 x i8> @test_compress_b_512(<64 x i8> %data, <64 x i8> %src, i64 %mask) {
+; CHECK-LABEL: test_compress_b_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovq %rdi, %k1
+; CHECK-NEXT:    vpcompressb %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src, i64 %mask)
+  ret <64 x i8> %res
+}
+declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64)
+
+define <32 x i16> @test_expand_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
+; CHECK-LABEL: test_expand_w_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
+  ret <32 x i16> %res
+}
+declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32)
+
+define <64 x i8> @test_expand_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
+; CHECK-LABEL: test_expand_b_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovq %rsi, %k1
+; CHECK-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
+  ret <64 x i8> %res
+}
+declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64)
+
+define <16 x i32>@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+declare <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
+
+define <8 x i64>@test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+declare <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
+
+define <32 x i16>@test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 %x4)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 -1)
+  %res2 = add <32 x i16> %res, %res1
+  ret <32 x i16> %res2
+}
+declare <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16>, <32 x i16>, i32, <32 x i16>, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
+  %res2 = add <16 x i32> %res, %res1
+  ret <16 x i32> %res2
+}
+declare <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
+
+define <8 x i64>@test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
+  %res2 = add <8 x i64> %res, %res1
+  ret <8 x i64> %res2
+}
+declare <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
+
+define <32 x i16>@test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddw %zmm0, %zmm2, %zmm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 %x4)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 -1)
+  %res2 = add <32 x i16> %res, %res1
+  ret <32 x i16> %res2
+}
+declare <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16>, <32 x i16>, i32, <32 x i16>, i32)
+
+declare <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT:    vpshrdvd (%rdi), %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm4
+; CHECK-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %zmm0, %zmm4, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %x2 = load <16 x i32>, <16 x i32>* %x2p
+  %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
+  %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16  %x3)
+  %res3 = add <16 x i32> %res, %res1
+  %res4 = add <16 x i32> %res2, %res3
+  ret <16 x i32> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+declare <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT:    vpshrdvq (%rdi), %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm4
+; CHECK-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddq %zmm0, %zmm4, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %x2 = load <8 x i64>, <8 x i64>* %x2p
+  %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8 -1)
+  %res2 = call <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8  %x3)
+  %res3 = add <8 x i64> %res, %res1
+  %res4 = add <8 x i64> %res2, %res3
+  ret <8 x i64> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+declare <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT:    vpshrdvw (%rdi), %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm4 {%k1} {z}
+; CHECK-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vpaddw %zmm4, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddw %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %x2 = load <32 x i16>, <32 x i16>* %x2p
+  %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32 -1)
+  %res2 = call <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32  %x3)
+  %res3 = add <32 x i16> %res, %res1
+  %res4 = add <32 x i16> %res2, %res3
+  ret <32 x i16> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+declare <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT:    vpshldvd (%rdi), %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT:    vpshldvd %zmm2, %zmm1, %zmm4
+; CHECK-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %zmm0, %zmm4, %zmm0
+; CHECK-NEXT:    vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %x2 = load <16 x i32>, <16 x i32>* %x2p
+  %res = call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+  %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
+  %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16  %x3)
+  %res3 = add <16 x i32> %res, %res1
+  %res4 = add <16 x i32> %res2, %res3
+  ret <16 x i32> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+declare <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT:    vpshldvq (%rdi), %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT:    vpshldvq %zmm2, %zmm1, %zmm4
+; CHECK-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddq %zmm0, %zmm4, %zmm0
+; CHECK-NEXT:    vpaddq %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %x2 = load <8 x i64>, <8 x i64>* %x2p
+  %res = call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+  %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8 -1)
+  %res2 = call <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8  %x3)
+  %res3 = add <8 x i64> %res, %res1
+  %res4 = add <8 x i64> %res2, %res3
+  ret <8 x i64> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+declare <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm3
+; CHECK-NEXT:    vpshldvw (%rdi), %zmm1, %zmm3 {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm4
+; CHECK-NEXT:    vpshldvw %zmm2, %zmm1, %zmm4 {%k1} {z}
+; CHECK-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vpaddw %zmm4, %zmm0, %zmm0
+; CHECK-NEXT:    vpaddw %zmm0, %zmm3, %zmm0
+; CHECK-NEXT:    retq
+  %x2 = load <32 x i16>, <32 x i16>* %x2p
+  %res = call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+  %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32 -1)
+  %res2 = call <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32  %x3)
+  %res3 = add <32 x i16> %res, %res1
+  %res4 = add <32 x i16> %res2, %res3
+  ret <32 x i16> %res4
+}
+

Added: llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll?rev=318745&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512vbmi2vl-intrinsics.ll Tue Nov 21 01:48:44 2017
@@ -0,0 +1,657 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl,+avx512vbmi2 | FileCheck %s
+
+define <16 x i16> @test_compress_w_256(<16 x i16> %src, <16 x i16> %data, i16 %mask) {
+; CHECK-LABEL: test_compress_w_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpcompressw %ymm1, %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16> %data, <16 x i16> %src, i16 %mask)
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx512.mask.compress.w.256(<16 x i16>, <16 x i16>, i16)
+
+define <8 x i16> @test_compress_w_128(<8 x i16> %data, i8 %mask) {
+; CHECK-LABEL: test_compress_w_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpcompressw %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.avx512.mask.compress.w.128(<8 x i16>, <8 x i16>, i8)
+
+define <32 x i8> @test_compress_b_256(<32 x i8> %src, <32 x i8> %data, i32 %mask) {
+; CHECK-LABEL: test_compress_b_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpcompressb %ymm1, %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %res = call <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8> %data, <32 x i8> %src, i32 %mask)
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx512.mask.compress.b.256(<32 x i8>, <32 x i8>, i32)
+
+define <16 x i8> @test_compress_b_128(<16 x i8> %data, i16 %mask) {
+; CHECK-LABEL: test_compress_b_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpcompressb %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.avx512.mask.compress.b.128(<16 x i8>, <16 x i8>, i16)
+
+define <32 x i8> @test_expand_b_256(<32 x i8> %data, <32 x i8> %src, i32 %mask) {
+; CHECK-LABEL: test_expand_b_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpexpandb %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.b.256( <32 x i8> %data, <32 x i8> %src, i32 %mask)
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx512.mask.expand.b.256(<32 x i8>, <32 x i8>, i32)
+
+define <16 x i8> @test_expand_b_128(<16 x i8> %data, i16 %mask) {
+; CHECK-LABEL: test_expand_b_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpexpandb %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8> %data, <16 x i8> zeroinitializer, i16 %mask)
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.avx512.mask.expand.b.128(<16 x i8>, <16 x i8>, i16)
+
+define <16 x i16> @test_expand_w_256(<16 x i16> %data, <16 x i16> %src, i16 %mask) {
+; CHECK-LABEL: test_expand_w_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpexpandw %ymm0, %ymm1 {%k1}
+; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.w.256( <16 x i16> %data, <16 x i16> %src, i16 %mask)
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx512.mask.expand.w.256(<16 x i16>, <16 x i16>, i16)
+
+define <8 x i16> @test_expand_w_128(<8 x i16> %data, i8 %mask) {
+; CHECK-LABEL: test_expand_w_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpexpandw %xmm0, %xmm0 {%k1} {z}
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16> %data, <8 x i16> zeroinitializer, i8 %mask)
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.avx512.mask.expand.w.128(<8 x i16>, <8 x i16>, i8)
+
+define <16 x i16> @test_expand_load_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
+; CHECK-LABEL: test_expand_load_w_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vpexpandw (%rdi), %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
+  ret <16 x i16> %res
+}
+declare <16 x i16> @llvm.x86.avx512.mask.expand.load.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
+
+define <8 x i16> @test_expand_load_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
+; CHECK-LABEL: test_expand_load_w_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vpexpandw (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
+  ret <8 x i16> %res
+}
+declare <8 x i16> @llvm.x86.avx512.mask.expand.load.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
+
+define void @test_compress_store_w_256(i8* %addr, <16 x i16> %data, i16 %mask) {
+; CHECK-LABEL: test_compress_store_w_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vpcompressw %ymm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
+  ret void
+}
+declare void @llvm.x86.avx512.mask.compress.store.w.256(i8* %addr, <16 x i16> %data, i16 %mask)
+
+define void @test_compress_store_w_128(i8* %addr, <8 x i16> %data, i8 %mask) {
+; CHECK-LABEL: test_compress_store_w_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vpcompressw %xmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
+  ret void
+}
+declare void @llvm.x86.avx512.mask.compress.store.w.128(i8* %addr, <8 x i16> %data, i8 %mask)
+
+define <32 x i8> @test_expand_load_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
+; CHECK-LABEL: test_expand_load_b_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vpexpandb (%rdi), %ymm0 {%k1}
+; CHECK-NEXT:    retq
+  %res = call <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
+  ret <32 x i8> %res
+}
+declare <32 x i8> @llvm.x86.avx512.mask.expand.load.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
+
+define <16 x i8> @test_expand_load_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
+; CHECK-LABEL: test_expand_load_b_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vpexpandb (%rdi), %xmm0 {%k1}
+; CHECK-NEXT:    retq
+  %res = call <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
+  ret <16 x i8> %res
+}
+declare <16 x i8> @llvm.x86.avx512.mask.expand.load.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
+
+define void @test_compress_store_b_256(i8* %addr, <32 x i8> %data, i32 %mask) {
+; CHECK-LABEL: test_compress_store_b_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vpcompressb %ymm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
+  ret void
+}
+declare void @llvm.x86.avx512.mask.compress.store.b.256(i8* %addr, <32 x i8> %data, i32 %mask)
+
+define void @test_compress_store_b_128(i8* %addr, <16 x i8> %data, i16 %mask) {
+; CHECK-LABEL: test_compress_store_b_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vpcompressb %xmm0, (%rdi) {%k1}
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
+  ret void
+}
+declare void @llvm.x86.avx512.mask.compress.store.b.128(i8* %addr, <16 x i8> %data, i16 %mask)
+
+define <4 x i32>@test_int_x86_avx512_mask_vpshld_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vpshldd $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    vpaddd %xmm3, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1)
+  %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4)
+  %res3 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res3, %res2
+  ret <4 x i32> %res4
+}
+declare <4 x i32> @llvm.x86.avx512.mask.vpshld.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vpshld_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vpshldd $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+declare <8 x i32> @llvm.x86.avx512.mask.vpshld.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_vpshld_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vpshldq $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+declare <2 x i64> @llvm.x86.avx512.mask.vpshld.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_vpshld_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vpshldq $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+declare <4 x i64> @llvm.x86.avx512.mask.vpshld.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_vpshld_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_w_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshldw $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vpshldw $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22, <8 x i16> %x3, i8 %x4)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22, <8 x i16> %x3, i8 -1)
+  %res2 = add <8 x i16> %res, %res1
+  ret <8 x i16> %res2
+}
+declare <8 x i16> @llvm.x86.avx512.mask.vpshld.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8)
+
+define <16 x i16>@test_int_x86_avx512_mask_vpshld_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshld_w_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshldw $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vpshldw $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22, <16 x i16> %x3, i16 %x4)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22, <16 x i16> %x3, i16 -1)
+  %res2 = add <16 x i16> %res, %res1
+  ret <16 x i16> %res2
+}
+declare <16 x i16> @llvm.x86.avx512.mask.vpshld.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16)
+
+define <4 x i32>@test_int_x86_avx512_mask_vpshrd_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrd_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vpshrdd $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    vpaddd %xmm3, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 %x4)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> %x3, i8 -1)
+  %res2 = call <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 22, <4 x i32> zeroinitializer,i8 %x4)
+  %res3 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res3, %res2
+  ret <4 x i32> %res4
+}
+declare <4 x i32> @llvm.x86.avx512.mask.vpshrd.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vpshrd_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrd_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vpshrdd $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
+  %res2 = add <8 x i32> %res, %res1
+  ret <8 x i32> %res2
+}
+declare <8 x i32> @llvm.x86.avx512.mask.vpshrd.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_vpshrd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrd_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vpshrdq $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 %x4)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 22, <2 x i64> %x3, i8 -1)
+  %res2 = add <2 x i64> %res, %res1
+  ret <2 x i64> %res2
+}
+declare <2 x i64> @llvm.x86.avx512.mask.vpshrd.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_vpshrd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrd_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vpshrdq $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
+  %res2 = add <4 x i64> %res, %res1
+  ret <4 x i64> %res2
+}
+declare <4 x i64> @llvm.x86.avx512.mask.vpshrd.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_vpshrd_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrd_w_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshrdw $22, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT:    vpshrdw $22, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+  %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22, <8 x i16> %x3, i8 %x4)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16> %x0, <8 x i16> %x1, i32 22, <8 x i16> %x3, i8 -1)
+  %res2 = add <8 x i16> %res, %res1
+  ret <8 x i16> %res2
+}
+declare <8 x i16> @llvm.x86.avx512.mask.vpshrd.w.128(<8 x i16>, <8 x i16>, i32, <8 x i16>, i8)
+
+define <16 x i16>@test_int_x86_avx512_mask_vpshrd_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrd_w_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edi, %k1
+; CHECK-NEXT:    vpshrdw $22, %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT:    vpshrdw $22, %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0
+; CHECK-NEXT:    retq
+  %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22, <16 x i16> %x3, i16 %x4)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16> %x0, <16 x i16> %x1, i32 22, <16 x i16> %x3, i16 -1)
+  %res2 = add <16 x i16> %res, %res1
+  ret <16 x i16> %res2
+}
+declare <16 x i16> @llvm.x86.avx512.mask.vpshrd.w.256(<16 x i16>, <16 x i16>, i32, <16 x i16>, i16)
+
+declare <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+declare <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vpshrdv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrdv_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa %ymm0, %ymm3
+; CHECK-NEXT:    vpshrdvd (%rdi), %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vmovdqa %ymm0, %ymm4
+; CHECK-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm4
+; CHECK-NEXT:    vpshrdvd %ymm2, %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %ymm0, %ymm4, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %x2 = load <8 x i32>, <8 x i32>* %x2p
+  %res = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1)
+  %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8  %x3)
+  %res3 = add <8 x i32> %res, %res1
+  %res4 = add <8 x i32> %res2, %res3
+  ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_vpshrdv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrdv_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm3
+; CHECK-NEXT:    vpshrdvd (%rdi), %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm4
+; CHECK-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm4
+; CHECK-NEXT:    vpshrdvd %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %x2 = load <4 x i32>, <4 x i32>* %x2p
+  %res = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1)
+  %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8  %x3)
+  %res3 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res2, %res3
+  ret <4 x i32> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+declare <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_vpshrdv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>* %x2p, <4 x i64> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrdv_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa %ymm0, %ymm3
+; CHECK-NEXT:    vpshrdvq (%rdi), %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vmovdqa %ymm0, %ymm4
+; CHECK-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm4
+; CHECK-NEXT:    vpshrdvq %ymm2, %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT:    vpaddq %ymm0, %ymm4, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %x2 = load <4 x i64>, <4 x i64>* %x2p
+  %res = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 -1)
+  %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8  %x3)
+  %res3 = add <4 x i64> %res, %res1
+  %res4 = add <4 x i64> %res2, %res3
+  ret <4 x i64> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+declare <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_vpshrdv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>* %x2p, <2 x i64> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrdv_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm3
+; CHECK-NEXT:    vpshrdvq (%rdi), %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm4
+; CHECK-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm4
+; CHECK-NEXT:    vpshrdvq %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddq %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %x2 = load <2 x i64>, <2 x i64>* %x2p
+  %res = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 -1)
+  %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8  %x3)
+  %res3 = add <2 x i64> %res, %res1
+  %res4 = add <2 x i64> %res2, %res3
+  ret <2 x i64> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+declare <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_vpshrdv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16>* %x2p, <16 x i16> %x4, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrdv_w_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa %ymm0, %ymm3
+; CHECK-NEXT:    vpshrdvw (%rdi), %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vmovdqa %ymm0, %ymm4
+; CHECK-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm4
+; CHECK-NEXT:    vpshrdvw %ymm2, %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT:    vpaddw %ymm0, %ymm4, %ymm0
+; CHECK-NEXT:    vpaddw %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %x2 = load <16 x i16>, <16 x i16>* %x2p
+  %res = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 -1)
+  %res2 = call <16 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16  %x3)
+  %res3 = add <16 x i16> %res, %res1
+  %res4 = add <16 x i16> %res2, %res3
+  ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+declare <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_vpshrdv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>* %x2p, <8 x i16> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshrdv_w_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm3
+; CHECK-NEXT:    vpshrdvw (%rdi), %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm4
+; CHECK-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm4
+; CHECK-NEXT:    vpshrdvw %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddw %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vpaddw %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %x2 = load <8 x i16>, <8 x i16>* %x2p
+  %res = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 -1)
+  %res2 = call <8 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8  %x3)
+  %res3 = add <8 x i16> %res, %res1
+  %res4 = add <8 x i16> %res2, %res3
+  ret <8 x i16> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+declare <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_vpshldv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshldv_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa %ymm0, %ymm3
+; CHECK-NEXT:    vpshldvd (%rdi), %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vmovdqa %ymm0, %ymm4
+; CHECK-NEXT:    vpshldvd %ymm2, %ymm1, %ymm4
+; CHECK-NEXT:    vpshldvd %ymm2, %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %ymm0, %ymm4, %ymm0
+; CHECK-NEXT:    vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %x2 = load <8 x i32>, <8 x i32>* %x2p
+  %res = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+  %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 -1)
+  %res2 = call <8 x i32> @llvm.x86.avx512.maskz.vpshldv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8  %x3)
+  %res3 = add <8 x i32> %res, %res1
+  %res4 = add <8 x i32> %res2, %res3
+  ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+declare <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_vpshldv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshldv_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm3
+; CHECK-NEXT:    vpshldvd (%rdi), %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm4
+; CHECK-NEXT:    vpshldvd %xmm2, %xmm1, %xmm4
+; CHECK-NEXT:    vpshldvd %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddd %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %x2 = load <4 x i32>, <4 x i32>* %x2p
+  %res = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+  %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 -1)
+  %res2 = call <4 x i32> @llvm.x86.avx512.maskz.vpshldv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8  %x3)
+  %res3 = add <4 x i32> %res, %res1
+  %res4 = add <4 x i32> %res2, %res3
+  ret <4 x i32> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+declare <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_vpshldv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64>* %x2p, <4 x i64> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshldv_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa %ymm0, %ymm3
+; CHECK-NEXT:    vpshldvq (%rdi), %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vmovdqa %ymm0, %ymm4
+; CHECK-NEXT:    vpshldvq %ymm2, %ymm1, %ymm4
+; CHECK-NEXT:    vpshldvq %ymm2, %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT:    vpaddq %ymm0, %ymm4, %ymm0
+; CHECK-NEXT:    vpaddq %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %x2 = load <4 x i64>, <4 x i64>* %x2p
+  %res = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
+  %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8 -1)
+  %res2 = call <4 x i64> @llvm.x86.avx512.maskz.vpshldv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x4, i8  %x3)
+  %res3 = add <4 x i64> %res, %res1
+  %res4 = add <4 x i64> %res2, %res3
+  ret <4 x i64> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+declare <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_vpshldv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64>* %x2p, <2 x i64> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshldv_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm3
+; CHECK-NEXT:    vpshldvq (%rdi), %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm4
+; CHECK-NEXT:    vpshldvq %xmm2, %xmm1, %xmm4
+; CHECK-NEXT:    vpshldvq %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddq %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vpaddq %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %x2 = load <2 x i64>, <2 x i64>* %x2p
+  %res = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
+  %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8 -1)
+  %res2 = call <2 x i64> @llvm.x86.avx512.maskz.vpshldv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x4, i8  %x3)
+  %res3 = add <2 x i64> %res, %res1
+  %res4 = add <2 x i64> %res2, %res3
+  ret <2 x i64> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+declare <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_vpshldv_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16>* %x2p, <16 x i16> %x4, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshldv_w_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa %ymm0, %ymm3
+; CHECK-NEXT:    vpshldvw (%rdi), %ymm1, %ymm3 {%k1}
+; CHECK-NEXT:    vmovdqa %ymm0, %ymm4
+; CHECK-NEXT:    vpshldvw %ymm2, %ymm1, %ymm4
+; CHECK-NEXT:    vpshldvw %ymm2, %ymm1, %ymm0 {%k1} {z}
+; CHECK-NEXT:    vpaddw %ymm0, %ymm4, %ymm0
+; CHECK-NEXT:    vpaddw %ymm0, %ymm3, %ymm0
+; CHECK-NEXT:    retq
+  %x2 = load <16 x i16>, <16 x i16>* %x2p
+  %res = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+  %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16 -1)
+  %res2 = call <16 x i16> @llvm.x86.avx512.maskz.vpshldv.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x4, i16  %x3)
+  %res3 = add <16 x i16> %res, %res1
+  %res4 = add <16 x i16> %res2, %res3
+  ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+declare <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_vpshldv_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16>* %x2p, <8 x i16> %x4, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_vpshldv_w_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %esi, %k1
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm3
+; CHECK-NEXT:    vpshldvw (%rdi), %xmm1, %xmm3 {%k1}
+; CHECK-NEXT:    vmovdqa %xmm0, %xmm4
+; CHECK-NEXT:    vpshldvw %xmm2, %xmm1, %xmm4
+; CHECK-NEXT:    vpshldvw %xmm2, %xmm1, %xmm0 {%k1} {z}
+; CHECK-NEXT:    vpaddw %xmm0, %xmm4, %xmm0
+; CHECK-NEXT:    vpaddw %xmm0, %xmm3, %xmm0
+; CHECK-NEXT:    retq
+  %x2 = load <8 x i16>, <8 x i16>* %x2p
+  %res = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+  %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8 -1)
+  %res2 = call <8 x i16> @llvm.x86.avx512.maskz.vpshldv.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x4, i8  %x3)
+  %res3 = add <8 x i16> %res, %res1
+  %res4 = add <8 x i16> %res2, %res3
+  ret <8 x i16> %res4
+}
+

Added: llvm/trunk/test/MC/X86/avx512vbmi2-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/avx512vbmi2-encoding.s?rev=318745&view=auto
==============================================================================
--- llvm/trunk/test/MC/X86/avx512vbmi2-encoding.s (added)
+++ llvm/trunk/test/MC/X86/avx512vbmi2-encoding.s Tue Nov 21 01:48:44 2017
@@ -0,0 +1,1793 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vbmi2 --show-encoding < %s | FileCheck %s
+
+// CHECK: vpexpandb %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x62,0xcb]
+          vpexpandb %zmm3, %zmm1
+
+// CHECK: vpexpandw %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x62,0xcb]
+          vpexpandw %zmm3, %zmm1
+
+// CHECK: vpexpandb %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x62,0xef]
+          vpexpandb %zmm23, %zmm21
+
+// CHECK: vpexpandw %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x62,0xef]
+          vpexpandw %zmm23, %zmm21
+
+// CHECK: vpexpandb %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x4a,0x62,0xcb]
+          vpexpandb %zmm3, %zmm1 {%k2}
+
+// CHECK: vpexpandw %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x4a,0x62,0xcb]
+          vpexpandw %zmm3, %zmm1 {%k2}
+
+// CHECK: vpexpandb %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x62,0xef]
+          vpexpandb %zmm23, %zmm21 {%k2}
+
+// CHECK: vpexpandw %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x62,0xef]
+          vpexpandw %zmm23, %zmm21 {%k2}
+
+// CHECK: vpexpandb  (%rcx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x62,0x09]
+          vpexpandb  (%rcx), %zmm1
+
+// CHECK: vpexpandb  -4(%rsp), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x62,0x4c,0x24,0xfc]
+          vpexpandb  -4(%rsp), %zmm1
+
+// CHECK: vpexpandb  4(%rsp), %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x62,0x4c,0x24,0x04]
+          vpexpandb  4(%rsp), %zmm1
+
+// CHECK: vpexpandb  268435456(%rcx,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x62,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandb  268435456(%rcx,%r14,8), %zmm1
+
+// CHECK: vpexpandb  -536870912(%rcx,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x62,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandb  -536870912(%rcx,%r14,8), %zmm1
+
+// CHECK: vpexpandb  -536870910(%rcx,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x62,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandb  -536870910(%rcx,%r14,8), %zmm1
+
+// CHECK: vpexpandw  (%rcx), %zmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x62,0x09]
+          vpexpandw  (%rcx), %zmm1
+
+// CHECK: vpexpandw  -8(%rsp), %zmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x62,0x4c,0x24,0xfc]
+          vpexpandw  -8(%rsp), %zmm1
+
+// CHECK: vpexpandw  8(%rsp), %zmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x62,0x4c,0x24,0x04]
+          vpexpandw  8(%rsp), %zmm1
+
+// CHECK: vpexpandw  268435456(%rcx,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x62,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandw  268435456(%rcx,%r14,8), %zmm1
+
+// CHECK: vpexpandw  -536870912(%rcx,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x62,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandw  -536870912(%rcx,%r14,8), %zmm1
+
+// CHECK: vpexpandw  -536870910(%rcx,%r14,8), %zmm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x62,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandw  -536870910(%rcx,%r14,8), %zmm1
+
+// CHECK: vpexpandb  (%rcx), %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x62,0x29]
+          vpexpandb  (%rcx), %zmm21
+
+// CHECK: vpexpandb  -4(%rsp), %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x62,0x6c,0x24,0xfc]
+          vpexpandb  -4(%rsp), %zmm21
+
+// CHECK: vpexpandb  4(%rsp), %zmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x62,0x6c,0x24,0x04]
+          vpexpandb  4(%rsp), %zmm21
+
+// CHECK: vpexpandb  268435456(%rcx,%r14,8), %zmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x62,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandb  268435456(%rcx,%r14,8), %zmm21
+
+// CHECK: vpexpandb  -536870912(%rcx,%r14,8), %zmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x62,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandb  -536870912(%rcx,%r14,8), %zmm21
+
+// CHECK: vpexpandb  -536870910(%rcx,%r14,8), %zmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x62,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandb  -536870910(%rcx,%r14,8), %zmm21
+
+// CHECK: vpexpandw  (%rcx), %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x62,0x29]
+          vpexpandw  (%rcx), %zmm21
+
+// CHECK: vpexpandw  -8(%rsp), %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x62,0x6c,0x24,0xfc]
+          vpexpandw  -8(%rsp), %zmm21
+
+// CHECK: vpexpandw  8(%rsp), %zmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x62,0x6c,0x24,0x04]
+          vpexpandw  8(%rsp), %zmm21
+
+// CHECK: vpexpandw  268435456(%rcx,%r14,8), %zmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x62,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandw  268435456(%rcx,%r14,8), %zmm21
+
+// CHECK: vpexpandw  -536870912(%rcx,%r14,8), %zmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x62,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandw  -536870912(%rcx,%r14,8), %zmm21
+
+// CHECK: vpexpandw  -536870910(%rcx,%r14,8), %zmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x62,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandw  -536870910(%rcx,%r14,8), %zmm21
+
+// CHECK: vpexpandb  (%rcx), %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x4a,0x62,0x09]
+          vpexpandb  (%rcx), %zmm1 {%k2}
+
+// CHECK: vpexpandb  -4(%rsp), %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x4a,0x62,0x4c,0x24,0xfc]
+          vpexpandb  -4(%rsp), %zmm1 {%k2}
+
+// CHECK: vpexpandb  4(%rsp), %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x4a,0x62,0x4c,0x24,0x04]
+          vpexpandb  4(%rsp), %zmm1 {%k2}
+
+// CHECK: vpexpandb  268435456(%rcx,%r14,8), %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x7d,0x4a,0x62,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandb  268435456(%rcx,%r14,8), %zmm1 {%k2}
+
+// CHECK: vpexpandb  -536870912(%rcx,%r14,8), %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x7d,0x4a,0x62,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandb  -536870912(%rcx,%r14,8), %zmm1 {%k2}
+
+// CHECK: vpexpandb  -536870910(%rcx,%r14,8), %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x7d,0x4a,0x62,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandb  -536870910(%rcx,%r14,8), %zmm1 {%k2}
+
+// CHECK: vpexpandw  (%rcx), %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x4a,0x62,0x09]
+          vpexpandw  (%rcx), %zmm1 {%k2}
+
+// CHECK: vpexpandw  -8(%rsp), %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x4a,0x62,0x4c,0x24,0xfc]
+          vpexpandw  -8(%rsp), %zmm1 {%k2}
+
+// CHECK: vpexpandw  8(%rsp), %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x4a,0x62,0x4c,0x24,0x04]
+          vpexpandw  8(%rsp), %zmm1 {%k2}
+
+// CHECK: vpexpandw  268435456(%rcx,%r14,8), %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xfd,0x4a,0x62,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandw  268435456(%rcx,%r14,8), %zmm1 {%k2}
+
+// CHECK: vpexpandw  -536870912(%rcx,%r14,8), %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xfd,0x4a,0x62,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandw  -536870912(%rcx,%r14,8), %zmm1 {%k2}
+
+// CHECK: vpexpandw  -536870910(%rcx,%r14,8), %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xfd,0x4a,0x62,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandw  -536870910(%rcx,%r14,8), %zmm1 {%k2}
+
+// CHECK: vpexpandb  (%rcx), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x62,0x29]
+          vpexpandb  (%rcx), %zmm21 {%k2}
+
+// CHECK: vpexpandb  -4(%rsp), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x62,0x6c,0x24,0xfc]
+          vpexpandb  -4(%rsp), %zmm21 {%k2}
+
+// CHECK: vpexpandb  4(%rsp), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x62,0x6c,0x24,0x04]
+          vpexpandb  4(%rsp), %zmm21 {%k2}
+
+// CHECK: vpexpandb  268435456(%rcx,%r14,8), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x62,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandb  268435456(%rcx,%r14,8), %zmm21 {%k2}
+
+// CHECK: vpexpandb  -536870912(%rcx,%r14,8), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x62,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandb  -536870912(%rcx,%r14,8), %zmm21 {%k2}
+
+// CHECK: vpexpandb  -536870910(%rcx,%r14,8), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x62,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandb  -536870910(%rcx,%r14,8), %zmm21 {%k2}
+
+// CHECK: vpexpandw  (%rcx), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x62,0x29]
+          vpexpandw  (%rcx), %zmm21 {%k2}
+
+// CHECK: vpexpandw  -8(%rsp), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x62,0x6c,0x24,0xfc]
+          vpexpandw  -8(%rsp), %zmm21 {%k2}
+
+// CHECK: vpexpandw  8(%rsp), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x62,0x6c,0x24,0x04]
+          vpexpandw  8(%rsp), %zmm21 {%k2}
+
+// CHECK: vpexpandw  268435456(%rcx,%r14,8), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x62,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandw  268435456(%rcx,%r14,8), %zmm21 {%k2}
+
+// CHECK: vpexpandw  -536870912(%rcx,%r14,8), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x62,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandw  -536870912(%rcx,%r14,8), %zmm21 {%k2}
+
+// CHECK: vpexpandw  -536870910(%rcx,%r14,8), %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x62,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandw  -536870910(%rcx,%r14,8), %zmm21 {%k2}
+
+// CHECK: vpcompressb %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x63,0xd9]
+          vpcompressb %zmm3, %zmm1
+
+// CHECK: vpcompressw %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x63,0xd9]
+          vpcompressw %zmm3, %zmm1
+
+// CHECK: vpcompressb %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x63,0xfd]
+          vpcompressb %zmm23, %zmm21
+
+// CHECK: vpcompressw %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x63,0xfd]
+          vpcompressw %zmm23, %zmm21
+
+// CHECK: vpcompressb %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x4a,0x63,0xd9]
+          vpcompressb %zmm3, %zmm1 {%k2}
+
+// CHECK: vpcompressw %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x4a,0x63,0xd9]
+          vpcompressw %zmm3, %zmm1 {%k2}
+
+// CHECK: vpcompressb %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x63,0xfd]
+          vpcompressb %zmm23, %zmm21 {%k2}
+
+// CHECK: vpcompressw %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x63,0xfd]
+          vpcompressw %zmm23, %zmm21 {%k2}
+
+// CHECK: vpcompressb  %zmm1, (%rcx)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x63,0x09]
+          vpcompressb  %zmm1, (%rcx)
+
+// CHECK: vpcompressb  %zmm1, -4(%rsp)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x63,0x4c,0x24,0xfc]
+          vpcompressb  %zmm1, -4(%rsp)
+
+// CHECK: vpcompressb  %zmm1, 4(%rsp)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x63,0x4c,0x24,0x04]
+          vpcompressb  %zmm1, 4(%rsp)
+
+// CHECK: vpcompressb  %zmm1, 268435456(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x63,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressb  %zmm1, 268435456(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %zmm1, -536870912(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x63,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressb  %zmm1, -536870912(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %zmm1, -536870910(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x63,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressb  %zmm1, -536870910(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %zmm1, (%rcx)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x63,0x09]
+          vpcompressw  %zmm1, (%rcx)
+
+// CHECK: vpcompressw  %zmm1, -8(%rsp)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x63,0x4c,0x24,0xfc]
+          vpcompressw  %zmm1, -8(%rsp)
+
+// CHECK: vpcompressw  %zmm1, 8(%rsp)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x63,0x4c,0x24,0x04]
+          vpcompressw  %zmm1, 8(%rsp)
+
+// CHECK: vpcompressw  %zmm1, 268435456(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x63,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressw  %zmm1, 268435456(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %zmm1, -536870912(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x63,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressw  %zmm1, -536870912(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %zmm1, -536870910(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x63,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressw  %zmm1, -536870910(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %zmm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x63,0x29]
+          vpcompressb  %zmm21, (%rcx) {%k2}
+
+// CHECK: vpcompressb  %zmm21, -4(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x63,0x6c,0x24,0xfc]
+          vpcompressb  %zmm21, -4(%rsp) {%k2}
+
+// CHECK: vpcompressb  %zmm21, 4(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x63,0x6c,0x24,0x04]
+          vpcompressb  %zmm21, 4(%rsp) {%k2}
+
+// CHECK: vpcompressb  %zmm21, 268435456(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x63,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressb  %zmm21, 268435456(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %zmm21, -536870912(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x63,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressb  %zmm21, -536870912(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %zmm21, -536870910(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x63,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressb  %zmm21, -536870910(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %zmm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x63,0x29]
+          vpcompressw  %zmm21, (%rcx) {%k2}
+
+// CHECK: vpcompressw  %zmm21, -8(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x63,0x6c,0x24,0xfc]
+          vpcompressw  %zmm21, -8(%rsp) {%k2}
+
+// CHECK: vpcompressw  %zmm21, 8(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x63,0x6c,0x24,0x04]
+          vpcompressw  %zmm21, 8(%rsp) {%k2}
+
+// CHECK: vpcompressw  %zmm21, 268435456(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x63,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressw  %zmm21, 268435456(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %zmm21, -536870912(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x63,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressw  %zmm21, -536870912(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %zmm21, -536870910(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x63,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressw  %zmm21, -536870910(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %zmm1, (%rcx)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x63,0x09]
+          vpcompressb  %zmm1, (%rcx)
+
+// CHECK: vpcompressb  %zmm1, -4(%rsp)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x63,0x4c,0x24,0xfc]
+          vpcompressb  %zmm1, -4(%rsp)
+
+// CHECK: vpcompressb  %zmm1, 4(%rsp)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x63,0x4c,0x24,0x04]
+          vpcompressb  %zmm1, 4(%rsp)
+
+// CHECK: vpcompressb  %zmm1, 268435456(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x63,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressb  %zmm1, 268435456(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %zmm1, -536870912(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x63,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressb  %zmm1, -536870912(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %zmm1, -536870910(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x63,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressb  %zmm1, -536870910(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %zmm1, (%rcx)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x63,0x09]
+          vpcompressw  %zmm1, (%rcx)
+
+// CHECK: vpcompressw  %zmm1, -8(%rsp)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x63,0x4c,0x24,0xfc]
+          vpcompressw  %zmm1, -8(%rsp)
+
+// CHECK: vpcompressw  %zmm1, 8(%rsp)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x63,0x4c,0x24,0x04]
+          vpcompressw  %zmm1, 8(%rsp)
+
+// CHECK: vpcompressw  %zmm1, 268435456(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x63,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressw  %zmm1, 268435456(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %zmm1, -536870912(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x63,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressw  %zmm1, -536870912(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %zmm1, -536870910(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x63,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressw  %zmm1, -536870910(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %zmm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x63,0x29]
+          vpcompressb  %zmm21, (%rcx) {%k2}
+
+// CHECK: vpcompressb  %zmm21, -4(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x63,0x6c,0x24,0xfc]
+          vpcompressb  %zmm21, -4(%rsp) {%k2}
+
+// CHECK: vpcompressb  %zmm21, 4(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x63,0x6c,0x24,0x04]
+          vpcompressb  %zmm21, 4(%rsp) {%k2}
+
+// CHECK: vpcompressb  %zmm21, 268435456(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x63,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressb  %zmm21, 268435456(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %zmm21, -536870912(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x63,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressb  %zmm21, -536870912(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %zmm21, -536870910(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x63,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressb  %zmm21, -536870910(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %zmm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x63,0x29]
+          vpcompressw  %zmm21, (%rcx) {%k2}
+
+// CHECK: vpcompressw  %zmm21, -8(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x63,0x6c,0x24,0xfc]
+          vpcompressw  %zmm21, -8(%rsp) {%k2}
+
+// CHECK: vpcompressw  %zmm21, 8(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x63,0x6c,0x24,0x04]
+          vpcompressw  %zmm21, 8(%rsp) {%k2}
+
+// CHECK: vpcompressw  %zmm21, 268435456(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x63,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressw  %zmm21, 268435456(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %zmm21, -536870912(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x63,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressw  %zmm21, -536870912(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %zmm21, -536870910(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x63,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressw  %zmm21, -536870910(%rcx,%r14,8) {%k2}
+
+// CHECK: vpshldw $7, %zmm3, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x70,0xcb,0x07]
+          vpshldw $7, %zmm3, %zmm3, %zmm1
+
+// CHECK: vpshldd $7, %zmm3, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x48,0x71,0xcb,0x07]
+          vpshldd $7, %zmm3, %zmm3, %zmm1
+
+// CHECK: vpshldq $7, %zmm3, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x71,0xcb,0x07]
+          vpshldq $7, %zmm3, %zmm3, %zmm1
+
+// CHECK: vpshrdw $7, %zmm3, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x72,0xcb,0x07]
+          vpshrdw $7, %zmm3, %zmm3, %zmm1
+
+// CHECK: vpshrdd $7, %zmm3, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x48,0x73,0xcb,0x07]
+          vpshrdd $7, %zmm3, %zmm3, %zmm1
+
+// CHECK: vpshrdq $7, %zmm3, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x73,0xcb,0x07]
+          vpshrdq $7, %zmm3, %zmm3, %zmm1
+
+// CHECK: vpshldw $7, %zmm23, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x70,0xef,0x07]
+          vpshldw $7, %zmm23, %zmm23, %zmm21
+
+// CHECK: vpshldd $7, %zmm23, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x40,0x71,0xef,0x07]
+          vpshldd $7, %zmm23, %zmm23, %zmm21
+
+// CHECK: vpshldq $7, %zmm23, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x71,0xef,0x07]
+          vpshldq $7, %zmm23, %zmm23, %zmm21
+
+// CHECK: vpshrdw $7, %zmm23, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x72,0xef,0x07]
+          vpshrdw $7, %zmm23, %zmm23, %zmm21
+
+// CHECK: vpshrdd $7, %zmm23, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x40,0x73,0xef,0x07]
+          vpshrdd $7, %zmm23, %zmm23, %zmm21
+
+// CHECK: vpshrdq $7, %zmm23, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x73,0xef,0x07]
+          vpshrdq $7, %zmm23, %zmm23, %zmm21
+
+// CHECK: vpshldw $7, %zmm3, %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x70,0xcb,0x07]
+          vpshldw $7, %zmm3, %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldd $7, %zmm3, %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x4a,0x71,0xcb,0x07]
+          vpshldd $7, %zmm3, %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldq $7, %zmm3, %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x71,0xcb,0x07]
+          vpshldq $7, %zmm3, %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdw $7, %zmm3, %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x72,0xcb,0x07]
+          vpshrdw $7, %zmm3, %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdd $7, %zmm3, %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x4a,0x73,0xcb,0x07]
+          vpshrdd $7, %zmm3, %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdq $7, %zmm3, %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x73,0xcb,0x07]
+          vpshrdq $7, %zmm3, %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldw $7, %zmm23, %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x70,0xef,0x07]
+          vpshldw $7, %zmm23, %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldd $7, %zmm23, %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x42,0x71,0xef,0x07]
+          vpshldd $7, %zmm23, %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldq $7, %zmm23, %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x71,0xef,0x07]
+          vpshldq $7, %zmm23, %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdw $7, %zmm23, %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x72,0xef,0x07]
+          vpshrdw $7, %zmm23, %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdd $7, %zmm23, %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x42,0x73,0xef,0x07]
+          vpshrdd $7, %zmm23, %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdq $7, %zmm23, %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x73,0xef,0x07]
+          vpshrdq $7, %zmm23, %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldw  $7, (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x70,0x09,0x07]
+          vpshldw  $7, (%rcx), %zmm3, %zmm1
+
+// CHECK: vpshldw  $7, -256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x70,0x4c,0x24,0xfc,0x07]
+          vpshldw  $7, -256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshldw  $7, 256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x70,0x4c,0x24,0x04,0x07]
+          vpshldw  $7, 256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshldw  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x48,0x70,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldw  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldw  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x48,0x70,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldw  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x48,0x70,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldd  $7, (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x48,0x71,0x09,0x07]
+          vpshldd  $7, (%rcx), %zmm3, %zmm1
+
+// CHECK: vpshldd  $7, -256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x48,0x71,0x4c,0x24,0xfc,0x07]
+          vpshldd  $7, -256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshldd  $7, 256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x48,0x71,0x4c,0x24,0x04,0x07]
+          vpshldd  $7, 256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshldd  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x48,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldd  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldd  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x48,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldd  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x48,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldq  $7, (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x71,0x09,0x07]
+          vpshldq  $7, (%rcx), %zmm3, %zmm1
+
+// CHECK: vpshldq  $7, -256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x71,0x4c,0x24,0xfc,0x07]
+          vpshldq  $7, -256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshldq  $7, 256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x71,0x4c,0x24,0x04,0x07]
+          vpshldq  $7, 256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshldq  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x48,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldq  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldq  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x48,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldq  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x48,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdw  $7, (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x72,0x09,0x07]
+          vpshrdw  $7, (%rcx), %zmm3, %zmm1
+
+// CHECK: vpshrdw  $7, -256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x72,0x4c,0x24,0xfc,0x07]
+          vpshrdw  $7, -256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshrdw  $7, 256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x72,0x4c,0x24,0x04,0x07]
+          vpshrdw  $7, 256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshrdw  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x48,0x72,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdw  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdw  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x48,0x72,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdw  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x48,0x72,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdd  $7, (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x48,0x73,0x09,0x07]
+          vpshrdd  $7, (%rcx), %zmm3, %zmm1
+
+// CHECK: vpshrdd  $7, -256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x48,0x73,0x4c,0x24,0xfc,0x07]
+          vpshrdd  $7, -256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshrdd  $7, 256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x48,0x73,0x4c,0x24,0x04,0x07]
+          vpshrdd  $7, 256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshrdd  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x48,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdd  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdd  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x48,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdd  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x48,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdq  $7, (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x73,0x09,0x07]
+          vpshrdq  $7, (%rcx), %zmm3, %zmm1
+
+// CHECK: vpshrdq  $7, -256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x73,0x4c,0x24,0xfc,0x07]
+          vpshrdq  $7, -256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshrdq  $7, 256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x48,0x73,0x4c,0x24,0x04,0x07]
+          vpshrdq  $7, 256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshrdq  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x48,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdq  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdq  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x48,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdq  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x48,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldw  $7, (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x40,0x70,0x29,0x07]
+          vpshldw  $7, (%rcx), %zmm23, %zmm21
+
+// CHECK: vpshldw  $7, -256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x40,0x70,0x6c,0x24,0xfc,0x07]
+          vpshldw  $7, -256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshldw  $7, 256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x40,0x70,0x6c,0x24,0x04,0x07]
+          vpshldw  $7, 256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshldw  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x70,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldw  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldw  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x70,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldw  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x70,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldd  $7, (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x40,0x71,0x29,0x07]
+          vpshldd  $7, (%rcx), %zmm23, %zmm21
+
+// CHECK: vpshldd  $7, -256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x40,0x71,0x6c,0x24,0xfc,0x07]
+          vpshldd  $7, -256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshldd  $7, 256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x40,0x71,0x6c,0x24,0x04,0x07]
+          vpshldd  $7, 256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshldd  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x40,0x71,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldd  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldd  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x40,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldd  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x40,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldq  $7, (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x40,0x71,0x29,0x07]
+          vpshldq  $7, (%rcx), %zmm23, %zmm21
+
+// CHECK: vpshldq  $7, -256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x40,0x71,0x6c,0x24,0xfc,0x07]
+          vpshldq  $7, -256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshldq  $7, 256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x40,0x71,0x6c,0x24,0x04,0x07]
+          vpshldq  $7, 256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshldq  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x71,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldq  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldq  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldq  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdw  $7, (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x40,0x72,0x29,0x07]
+          vpshrdw  $7, (%rcx), %zmm23, %zmm21
+
+// CHECK: vpshrdw  $7, -256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x40,0x72,0x6c,0x24,0xfc,0x07]
+          vpshrdw  $7, -256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshrdw  $7, 256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x40,0x72,0x6c,0x24,0x04,0x07]
+          vpshrdw  $7, 256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshrdw  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x72,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdw  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdw  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x72,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdw  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x72,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdd  $7, (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x40,0x73,0x29,0x07]
+          vpshrdd  $7, (%rcx), %zmm23, %zmm21
+
+// CHECK: vpshrdd  $7, -256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x40,0x73,0x6c,0x24,0xfc,0x07]
+          vpshrdd  $7, -256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshrdd  $7, 256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x40,0x73,0x6c,0x24,0x04,0x07]
+          vpshrdd  $7, 256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshrdd  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x40,0x73,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdd  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdd  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x40,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdd  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x40,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdq  $7, (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x40,0x73,0x29,0x07]
+          vpshrdq  $7, (%rcx), %zmm23, %zmm21
+
+// CHECK: vpshrdq  $7, -256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x40,0x73,0x6c,0x24,0xfc,0x07]
+          vpshrdq  $7, -256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshrdq  $7, 256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x40,0x73,0x6c,0x24,0x04,0x07]
+          vpshrdq  $7, 256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshrdq  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x73,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdq  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdq  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdq  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x40,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldw  $7, (%rcx), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x70,0x09,0x07]
+          vpshldw  $7, (%rcx), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldw  $7, -256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x70,0x4c,0x24,0xfc,0x07]
+          vpshldw  $7, -256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldw  $7, 256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x70,0x4c,0x24,0x04,0x07]
+          vpshldw  $7, 256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldw  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x4a,0x70,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldw  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldw  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x4a,0x70,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldw  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x4a,0x70,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldd  $7, (%rcx), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x4a,0x71,0x09,0x07]
+          vpshldd  $7, (%rcx), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldd  $7, -256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x4a,0x71,0x4c,0x24,0xfc,0x07]
+          vpshldd  $7, -256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldd  $7, 256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x4a,0x71,0x4c,0x24,0x04,0x07]
+          vpshldd  $7, 256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldd  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x4a,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldd  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldd  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x4a,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldd  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x4a,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldq  $7, (%rcx), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x71,0x09,0x07]
+          vpshldq  $7, (%rcx), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldq  $7, -256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x71,0x4c,0x24,0xfc,0x07]
+          vpshldq  $7, -256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldq  $7, 256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x71,0x4c,0x24,0x04,0x07]
+          vpshldq  $7, 256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldq  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x4a,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldq  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldq  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x4a,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldq  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x4a,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdw  $7, (%rcx), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x72,0x09,0x07]
+          vpshrdw  $7, (%rcx), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdw  $7, -256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x72,0x4c,0x24,0xfc,0x07]
+          vpshrdw  $7, -256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdw  $7, 256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x72,0x4c,0x24,0x04,0x07]
+          vpshrdw  $7, 256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdw  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x4a,0x72,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdw  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdw  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x4a,0x72,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdw  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x4a,0x72,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdd  $7, (%rcx), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x4a,0x73,0x09,0x07]
+          vpshrdd  $7, (%rcx), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdd  $7, -256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x4a,0x73,0x4c,0x24,0xfc,0x07]
+          vpshrdd  $7, -256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdd  $7, 256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x4a,0x73,0x4c,0x24,0x04,0x07]
+          vpshrdd  $7, 256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdd  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x4a,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdd  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdd  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x4a,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdd  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x4a,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdq  $7, (%rcx), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x73,0x09,0x07]
+          vpshrdq  $7, (%rcx), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdq  $7, -256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x73,0x4c,0x24,0xfc,0x07]
+          vpshrdq  $7, -256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdq  $7, 256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x4a,0x73,0x4c,0x24,0x04,0x07]
+          vpshrdq  $7, 256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdq  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x4a,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdq  $7, 268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdq  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x4a,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdq  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x4a,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldw  $7, (%rcx), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x42,0x70,0x29,0x07]
+          vpshldw  $7, (%rcx), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldw  $7, -256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x42,0x70,0x6c,0x24,0xfc,0x07]
+          vpshldw  $7, -256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldw  $7, 256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x42,0x70,0x6c,0x24,0x04,0x07]
+          vpshldw  $7, 256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldw  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x70,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldw  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldw  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x70,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldw  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x70,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldd  $7, (%rcx), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x42,0x71,0x29,0x07]
+          vpshldd  $7, (%rcx), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldd  $7, -256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x42,0x71,0x6c,0x24,0xfc,0x07]
+          vpshldd  $7, -256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldd  $7, 256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x42,0x71,0x6c,0x24,0x04,0x07]
+          vpshldd  $7, 256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldd  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x42,0x71,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldd  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldd  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x42,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldd  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x42,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldq  $7, (%rcx), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x42,0x71,0x29,0x07]
+          vpshldq  $7, (%rcx), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldq  $7, -256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x42,0x71,0x6c,0x24,0xfc,0x07]
+          vpshldq  $7, -256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldq  $7, 256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x42,0x71,0x6c,0x24,0x04,0x07]
+          vpshldq  $7, 256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldq  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x71,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldq  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldq  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldq  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdw  $7, (%rcx), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x42,0x72,0x29,0x07]
+          vpshrdw  $7, (%rcx), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdw  $7, -256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x42,0x72,0x6c,0x24,0xfc,0x07]
+          vpshrdw  $7, -256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdw  $7, 256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x42,0x72,0x6c,0x24,0x04,0x07]
+          vpshrdw  $7, 256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdw  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x72,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdw  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdw  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x72,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdw  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x72,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdd  $7, (%rcx), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x42,0x73,0x29,0x07]
+          vpshrdd  $7, (%rcx), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdd  $7, -256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x42,0x73,0x6c,0x24,0xfc,0x07]
+          vpshrdd  $7, -256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdd  $7, 256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x42,0x73,0x6c,0x24,0x04,0x07]
+          vpshrdd  $7, 256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdd  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x42,0x73,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdd  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdd  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x42,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdd  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x42,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdq  $7, (%rcx), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x42,0x73,0x29,0x07]
+          vpshrdq  $7, (%rcx), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdq  $7, -256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x42,0x73,0x6c,0x24,0xfc,0x07]
+          vpshrdq  $7, -256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdq  $7, 256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x42,0x73,0x6c,0x24,0x04,0x07]
+          vpshrdq  $7, 256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdq  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x73,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdq  $7, 268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdq  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdq  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x42,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvw %zmm3, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x70,0xcb]
+          vpshldvw %zmm3, %zmm3, %zmm1
+
+// CHECK: vpshldvd %zmm3, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x48,0x71,0xcb]
+          vpshldvd %zmm3, %zmm3, %zmm1
+
+// CHECK: vpshldvq %zmm3, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x71,0xcb]
+          vpshldvq %zmm3, %zmm3, %zmm1
+
+// CHECK: vpshrdvw %zmm3, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x72,0xcb]
+          vpshrdvw %zmm3, %zmm3, %zmm1
+
+// CHECK: vpshrdvd %zmm3, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x48,0x73,0xcb]
+          vpshrdvd %zmm3, %zmm3, %zmm1
+
+// CHECK: vpshrdvq %zmm3, %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x73,0xcb]
+          vpshrdvq %zmm3, %zmm3, %zmm1
+
+// CHECK: vpshldvw %zmm23, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x70,0xef]
+          vpshldvw %zmm23, %zmm23, %zmm21
+
+// CHECK: vpshldvd %zmm23, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x40,0x71,0xef]
+          vpshldvd %zmm23, %zmm23, %zmm21
+
+// CHECK: vpshldvq %zmm23, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x71,0xef]
+          vpshldvq %zmm23, %zmm23, %zmm21
+
+// CHECK: vpshrdvw %zmm23, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x72,0xef]
+          vpshrdvw %zmm23, %zmm23, %zmm21
+
+// CHECK: vpshrdvd %zmm23, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x40,0x73,0xef]
+          vpshrdvd %zmm23, %zmm23, %zmm21
+
+// CHECK: vpshrdvq %zmm23, %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x73,0xef]
+          vpshrdvq %zmm23, %zmm23, %zmm21
+
+// CHECK: vpshldvw %zmm3, %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x70,0xcb]
+          vpshldvw %zmm3, %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvd %zmm3, %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x4a,0x71,0xcb]
+          vpshldvd %zmm3, %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvq %zmm3, %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x71,0xcb]
+          vpshldvq %zmm3, %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvw %zmm3, %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x72,0xcb]
+          vpshrdvw %zmm3, %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvd %zmm3, %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x4a,0x73,0xcb]
+          vpshrdvd %zmm3, %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvq %zmm3, %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x73,0xcb]
+          vpshrdvq %zmm3, %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvw %zmm23, %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x70,0xef]
+          vpshldvw %zmm23, %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvd %zmm23, %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x42,0x71,0xef]
+          vpshldvd %zmm23, %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvq %zmm23, %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x71,0xef]
+          vpshldvq %zmm23, %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvw %zmm23, %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x72,0xef]
+          vpshrdvw %zmm23, %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvd %zmm23, %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x42,0x73,0xef]
+          vpshrdvd %zmm23, %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvq %zmm23, %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x73,0xef]
+          vpshrdvq %zmm23, %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvw  (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x70,0x09]
+          vpshldvw  (%rcx), %zmm3, %zmm1
+
+// CHECK: vpshldvw  -256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x70,0x4c,0x24,0xfc]
+          vpshldvw  -256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshldvw  256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x70,0x4c,0x24,0x04]
+          vpshldvw  256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshldvw  268435456(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x48,0x70,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvw  268435456(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldvw  -536870912(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x48,0x70,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvw  -536870912(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldvw  -536870910(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x48,0x70,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvw  -536870910(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldvd  (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x48,0x71,0x09]
+          vpshldvd  (%rcx), %zmm3, %zmm1
+
+// CHECK: vpshldvd  -256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x48,0x71,0x4c,0x24,0xfc]
+          vpshldvd  -256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshldvd  256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x48,0x71,0x4c,0x24,0x04]
+          vpshldvd  256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshldvd  268435456(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x48,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvd  268435456(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldvd  -536870912(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x48,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvd  -536870912(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldvd  -536870910(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x48,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvd  -536870910(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldvq  (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x71,0x09]
+          vpshldvq  (%rcx), %zmm3, %zmm1
+
+// CHECK: vpshldvq  -256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x71,0x4c,0x24,0xfc]
+          vpshldvq  -256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshldvq  256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x71,0x4c,0x24,0x04]
+          vpshldvq  256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshldvq  268435456(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x48,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvq  268435456(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldvq  -536870912(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x48,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvq  -536870912(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldvq  -536870910(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x48,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvq  -536870910(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdvw  (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x72,0x09]
+          vpshrdvw  (%rcx), %zmm3, %zmm1
+
+// CHECK: vpshrdvw  -256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x72,0x4c,0x24,0xfc]
+          vpshrdvw  -256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshrdvw  256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x72,0x4c,0x24,0x04]
+          vpshrdvw  256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshrdvw  268435456(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x48,0x72,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvw  268435456(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdvw  -536870912(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x48,0x72,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvw  -536870912(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdvw  -536870910(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x48,0x72,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvw  -536870910(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdvd  (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x48,0x73,0x09]
+          vpshrdvd  (%rcx), %zmm3, %zmm1
+
+// CHECK: vpshrdvd  -256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x48,0x73,0x4c,0x24,0xfc]
+          vpshrdvd  -256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshrdvd  256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x48,0x73,0x4c,0x24,0x04]
+          vpshrdvd  256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshrdvd  268435456(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x48,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvd  268435456(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdvd  -536870912(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x48,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvd  -536870912(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdvd  -536870910(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x48,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvd  -536870910(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdvq  (%rcx), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x73,0x09]
+          vpshrdvq  (%rcx), %zmm3, %zmm1
+
+// CHECK: vpshrdvq  -256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x73,0x4c,0x24,0xfc]
+          vpshrdvq  -256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshrdvq  256(%rsp), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x48,0x73,0x4c,0x24,0x04]
+          vpshrdvq  256(%rsp), %zmm3, %zmm1
+
+// CHECK: vpshrdvq  268435456(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x48,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvq  268435456(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdvq  -536870912(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x48,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvq  -536870912(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshrdvq  -536870910(%rcx,%r14,8), %zmm3, %zmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x48,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvq  -536870910(%rcx,%r14,8), %zmm3, %zmm1
+
+// CHECK: vpshldvw  (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x70,0x29]
+          vpshldvw  (%rcx), %zmm23, %zmm21
+
+// CHECK: vpshldvw  -256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x70,0x6c,0x24,0xfc]
+          vpshldvw  -256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshldvw  256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x70,0x6c,0x24,0x04]
+          vpshldvw  256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshldvw  268435456(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x70,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvw  268435456(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldvw  -536870912(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x70,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvw  -536870912(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldvw  -536870910(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x70,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvw  -536870910(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldvd  (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x40,0x71,0x29]
+          vpshldvd  (%rcx), %zmm23, %zmm21
+
+// CHECK: vpshldvd  -256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x40,0x71,0x6c,0x24,0xfc]
+          vpshldvd  -256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshldvd  256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x40,0x71,0x6c,0x24,0x04]
+          vpshldvd  256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshldvd  268435456(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x40,0x71,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvd  268435456(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldvd  -536870912(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x40,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvd  -536870912(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldvd  -536870910(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x40,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvd  -536870910(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldvq  (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x71,0x29]
+          vpshldvq  (%rcx), %zmm23, %zmm21
+
+// CHECK: vpshldvq  -256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x71,0x6c,0x24,0xfc]
+          vpshldvq  -256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshldvq  256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x71,0x6c,0x24,0x04]
+          vpshldvq  256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshldvq  268435456(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x71,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvq  268435456(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldvq  -536870912(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvq  -536870912(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldvq  -536870910(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvq  -536870910(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdvw  (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x72,0x29]
+          vpshrdvw  (%rcx), %zmm23, %zmm21
+
+// CHECK: vpshrdvw  -256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x72,0x6c,0x24,0xfc]
+          vpshrdvw  -256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshrdvw  256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x72,0x6c,0x24,0x04]
+          vpshrdvw  256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshrdvw  268435456(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x72,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvw  268435456(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdvw  -536870912(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x72,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvw  -536870912(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdvw  -536870910(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x72,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvw  -536870910(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdvd  (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x40,0x73,0x29]
+          vpshrdvd  (%rcx), %zmm23, %zmm21
+
+// CHECK: vpshrdvd  -256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x40,0x73,0x6c,0x24,0xfc]
+          vpshrdvd  -256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshrdvd  256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x40,0x73,0x6c,0x24,0x04]
+          vpshrdvd  256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshrdvd  268435456(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x40,0x73,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvd  268435456(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdvd  -536870912(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x40,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvd  -536870912(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdvd  -536870910(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x40,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvd  -536870910(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdvq  (%rcx), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x73,0x29]
+          vpshrdvq  (%rcx), %zmm23, %zmm21
+
+// CHECK: vpshrdvq  -256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x73,0x6c,0x24,0xfc]
+          vpshrdvq  -256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshrdvq  256(%rsp), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x40,0x73,0x6c,0x24,0x04]
+          vpshrdvq  256(%rsp), %zmm23, %zmm21
+
+// CHECK: vpshrdvq  268435456(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x73,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvq  268435456(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdvq  -536870912(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvq  -536870912(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshrdvq  -536870910(%rcx,%r14,8), %zmm23, %zmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x40,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvq  -536870910(%rcx,%r14,8), %zmm23, %zmm21
+
+// CHECK: vpshldvw  (%rcx), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x70,0x09]
+          vpshldvw  (%rcx), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvw  -256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x70,0x4c,0x24,0xfc]
+          vpshldvw  -256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvw  256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x70,0x4c,0x24,0x04]
+          vpshldvw  256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvw  268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x4a,0x70,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvw  268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvw  -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x4a,0x70,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvw  -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvw  -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x4a,0x70,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvw  -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvd  (%rcx), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x4a,0x71,0x09]
+          vpshldvd  (%rcx), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvd  -256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x4a,0x71,0x4c,0x24,0xfc]
+          vpshldvd  -256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvd  256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x4a,0x71,0x4c,0x24,0x04]
+          vpshldvd  256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvd  268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x4a,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvd  268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvd  -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x4a,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvd  -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvd  -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x4a,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvd  -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvq  (%rcx), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x71,0x09]
+          vpshldvq  (%rcx), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvq  -256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x71,0x4c,0x24,0xfc]
+          vpshldvq  -256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvq  256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x71,0x4c,0x24,0x04]
+          vpshldvq  256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvq  268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x4a,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvq  268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvq  -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x4a,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvq  -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvq  -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x4a,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvq  -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvw  (%rcx), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x72,0x09]
+          vpshrdvw  (%rcx), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvw  -256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x72,0x4c,0x24,0xfc]
+          vpshrdvw  -256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvw  256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x72,0x4c,0x24,0x04]
+          vpshrdvw  256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvw  268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x4a,0x72,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvw  268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvw  -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x4a,0x72,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvw  -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvw  -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x4a,0x72,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvw  -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvd  (%rcx), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x4a,0x73,0x09]
+          vpshrdvd  (%rcx), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvd  -256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x4a,0x73,0x4c,0x24,0xfc]
+          vpshrdvd  -256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvd  256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x4a,0x73,0x4c,0x24,0x04]
+          vpshrdvd  256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvd  268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x4a,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvd  268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvd  -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x4a,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvd  -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvd  -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x4a,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvd  -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvq  (%rcx), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x73,0x09]
+          vpshrdvq  (%rcx), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvq  -256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x73,0x4c,0x24,0xfc]
+          vpshrdvq  -256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvq  256(%rsp), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x4a,0x73,0x4c,0x24,0x04]
+          vpshrdvq  256(%rsp), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvq  268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x4a,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvq  268435456(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvq  -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x4a,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvq  -536870912(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshrdvq  -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x4a,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvq  -536870910(%rcx,%r14,8), %zmm3, %zmm1 {%k2}
+
+// CHECK: vpshldvw  (%rcx), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x42,0x70,0x29]
+          vpshldvw  (%rcx), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvw  -256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x42,0x70,0x6c,0x24,0xfc]
+          vpshldvw  -256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvw  256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x42,0x70,0x6c,0x24,0x04]
+          vpshldvw  256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvw  268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x70,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvw  268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvw  -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x70,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvw  -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvw  -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x70,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvw  -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvd  (%rcx), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x42,0x71,0x29]
+          vpshldvd  (%rcx), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvd  -256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x42,0x71,0x6c,0x24,0xfc]
+          vpshldvd  -256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvd  256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x42,0x71,0x6c,0x24,0x04]
+          vpshldvd  256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvd  268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x42,0x71,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvd  268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvd  -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x42,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvd  -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvd  -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x42,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvd  -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvq  (%rcx), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x42,0x71,0x29]
+          vpshldvq  (%rcx), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvq  -256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x42,0x71,0x6c,0x24,0xfc]
+          vpshldvq  -256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvq  256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x42,0x71,0x6c,0x24,0x04]
+          vpshldvq  256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvq  268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x71,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvq  268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvq  -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvq  -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshldvq  -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvq  -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvw  (%rcx), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x42,0x72,0x29]
+          vpshrdvw  (%rcx), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvw  -256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x42,0x72,0x6c,0x24,0xfc]
+          vpshrdvw  -256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvw  256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x42,0x72,0x6c,0x24,0x04]
+          vpshrdvw  256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvw  268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x72,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvw  268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvw  -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x72,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvw  -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvw  -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x72,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvw  -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvd  (%rcx), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x42,0x73,0x29]
+          vpshrdvd  (%rcx), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvd  -256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x42,0x73,0x6c,0x24,0xfc]
+          vpshrdvd  -256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvd  256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x42,0x73,0x6c,0x24,0x04]
+          vpshrdvd  256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvd  268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x42,0x73,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvd  268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvd  -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x42,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvd  -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvd  -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x42,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvd  -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvq  (%rcx), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x42,0x73,0x29]
+          vpshrdvq  (%rcx), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvq  -256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x42,0x73,0x6c,0x24,0xfc]
+          vpshrdvq  -256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvq  256(%rsp), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x42,0x73,0x6c,0x24,0x04]
+          vpshrdvq  256(%rsp), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvq  268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x73,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvq  268435456(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvq  -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvq  -536870912(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+
+// CHECK: vpshrdvq  -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x42,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvq  -536870910(%rcx,%r14,8), %zmm23, %zmm21 {%k2}

Added: llvm/trunk/test/MC/X86/avx512vbmi2vl-encoding.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/avx512vbmi2vl-encoding.s?rev=318745&view=auto
==============================================================================
--- llvm/trunk/test/MC/X86/avx512vbmi2vl-encoding.s (added)
+++ llvm/trunk/test/MC/X86/avx512vbmi2vl-encoding.s Tue Nov 21 01:48:44 2017
@@ -0,0 +1,3585 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vbmi2 -mattr=+avx512vl --show-encoding < %s | FileCheck %s
+
+// CHECK: vpexpandb %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x62,0xcb]
+          vpexpandb %xmm3, %xmm1
+
+// CHECK: vpexpandw %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x62,0xcb]
+          vpexpandw %xmm3, %xmm1
+
+// CHECK: vpexpandb %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x62,0xef]
+          vpexpandb %xmm23, %xmm21
+
+// CHECK: vpexpandw %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x62,0xef]
+          vpexpandw %xmm23, %xmm21
+
+// CHECK: vpexpandb %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x0a,0x62,0xcb]
+          vpexpandb %xmm3, %xmm1 {%k2}
+
+// CHECK: vpexpandw %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x0a,0x62,0xcb]
+          vpexpandw %xmm3, %xmm1 {%k2}
+
+// CHECK: vpexpandb %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x62,0xef]
+          vpexpandb %xmm23, %xmm21 {%k2}
+
+// CHECK: vpexpandw %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x62,0xef]
+          vpexpandw %xmm23, %xmm21 {%k2}
+
+// CHECK: vpexpandb  (%rcx), %xmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x62,0x09]
+          vpexpandb  (%rcx), %xmm1
+
+// CHECK: vpexpandb  -4(%rsp), %xmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x62,0x4c,0x24,0xfc]
+          vpexpandb  -4(%rsp), %xmm1
+
+// CHECK: vpexpandb  4(%rsp), %xmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x62,0x4c,0x24,0x04]
+          vpexpandb  4(%rsp), %xmm1
+
+// CHECK: vpexpandb  268435456(%rcx,%r14,8), %xmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x62,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandb  268435456(%rcx,%r14,8), %xmm1
+
+// CHECK: vpexpandb  -536870912(%rcx,%r14,8), %xmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x62,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandb  -536870912(%rcx,%r14,8), %xmm1
+
+// CHECK: vpexpandb  -536870910(%rcx,%r14,8), %xmm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x62,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandb  -536870910(%rcx,%r14,8), %xmm1
+
+// CHECK: vpexpandw  (%rcx), %xmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x62,0x09]
+          vpexpandw  (%rcx), %xmm1
+
+// CHECK: vpexpandw  -8(%rsp), %xmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x62,0x4c,0x24,0xfc]
+          vpexpandw  -8(%rsp), %xmm1
+
+// CHECK: vpexpandw  8(%rsp), %xmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x62,0x4c,0x24,0x04]
+          vpexpandw  8(%rsp), %xmm1
+
+// CHECK: vpexpandw  268435456(%rcx,%r14,8), %xmm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x62,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandw  268435456(%rcx,%r14,8), %xmm1
+
+// CHECK: vpexpandw  -536870912(%rcx,%r14,8), %xmm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x62,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandw  -536870912(%rcx,%r14,8), %xmm1
+
+// CHECK: vpexpandw  -536870910(%rcx,%r14,8), %xmm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x62,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandw  -536870910(%rcx,%r14,8), %xmm1
+
+// CHECK: vpexpandb  (%rcx), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x62,0x29]
+          vpexpandb  (%rcx), %xmm21
+
+// CHECK: vpexpandb  -4(%rsp), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x62,0x6c,0x24,0xfc]
+          vpexpandb  -4(%rsp), %xmm21
+
+// CHECK: vpexpandb  4(%rsp), %xmm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x62,0x6c,0x24,0x04]
+          vpexpandb  4(%rsp), %xmm21
+
+// CHECK: vpexpandb  268435456(%rcx,%r14,8), %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x62,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandb  268435456(%rcx,%r14,8), %xmm21
+
+// CHECK: vpexpandb  -536870912(%rcx,%r14,8), %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x62,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandb  -536870912(%rcx,%r14,8), %xmm21
+
+// CHECK: vpexpandb  -536870910(%rcx,%r14,8), %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x62,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandb  -536870910(%rcx,%r14,8), %xmm21
+
+// CHECK: vpexpandw  (%rcx), %xmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x62,0x29]
+          vpexpandw  (%rcx), %xmm21
+
+// CHECK: vpexpandw  -8(%rsp), %xmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x62,0x6c,0x24,0xfc]
+          vpexpandw  -8(%rsp), %xmm21
+
+// CHECK: vpexpandw  8(%rsp), %xmm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x62,0x6c,0x24,0x04]
+          vpexpandw  8(%rsp), %xmm21
+
+// CHECK: vpexpandw  268435456(%rcx,%r14,8), %xmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x62,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandw  268435456(%rcx,%r14,8), %xmm21
+
+// CHECK: vpexpandw  -536870912(%rcx,%r14,8), %xmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x62,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandw  -536870912(%rcx,%r14,8), %xmm21
+
+// CHECK: vpexpandw  -536870910(%rcx,%r14,8), %xmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x62,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandw  -536870910(%rcx,%r14,8), %xmm21
+
+// CHECK: vpexpandb  (%rcx), %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x0a,0x62,0x09]
+          vpexpandb  (%rcx), %xmm1 {%k2}
+
+// CHECK: vpexpandb  -4(%rsp), %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x0a,0x62,0x4c,0x24,0xfc]
+          vpexpandb  -4(%rsp), %xmm1 {%k2}
+
+// CHECK: vpexpandb  4(%rsp), %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x0a,0x62,0x4c,0x24,0x04]
+          vpexpandb  4(%rsp), %xmm1 {%k2}
+
+// CHECK: vpexpandb  268435456(%rcx,%r14,8), %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x7d,0x0a,0x62,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandb  268435456(%rcx,%r14,8), %xmm1 {%k2}
+
+// CHECK: vpexpandb  -536870912(%rcx,%r14,8), %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x7d,0x0a,0x62,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandb  -536870912(%rcx,%r14,8), %xmm1 {%k2}
+
+// CHECK: vpexpandb  -536870910(%rcx,%r14,8), %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x7d,0x0a,0x62,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandb  -536870910(%rcx,%r14,8), %xmm1 {%k2}
+
+// CHECK: vpexpandw  (%rcx), %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x0a,0x62,0x09]
+          vpexpandw  (%rcx), %xmm1 {%k2}
+
+// CHECK: vpexpandw  -8(%rsp), %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x0a,0x62,0x4c,0x24,0xfc]
+          vpexpandw  -8(%rsp), %xmm1 {%k2}
+
+// CHECK: vpexpandw  8(%rsp), %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x0a,0x62,0x4c,0x24,0x04]
+          vpexpandw  8(%rsp), %xmm1 {%k2}
+
+// CHECK: vpexpandw  268435456(%rcx,%r14,8), %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xfd,0x0a,0x62,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandw  268435456(%rcx,%r14,8), %xmm1 {%k2}
+
+// CHECK: vpexpandw  -536870912(%rcx,%r14,8), %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xfd,0x0a,0x62,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandw  -536870912(%rcx,%r14,8), %xmm1 {%k2}
+
+// CHECK: vpexpandw  -536870910(%rcx,%r14,8), %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xfd,0x0a,0x62,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandw  -536870910(%rcx,%r14,8), %xmm1 {%k2}
+
+// CHECK: vpexpandb  (%rcx), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x62,0x29]
+          vpexpandb  (%rcx), %xmm21 {%k2}
+
+// CHECK: vpexpandb  -4(%rsp), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x62,0x6c,0x24,0xfc]
+          vpexpandb  -4(%rsp), %xmm21 {%k2}
+
+// CHECK: vpexpandb  4(%rsp), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x62,0x6c,0x24,0x04]
+          vpexpandb  4(%rsp), %xmm21 {%k2}
+
+// CHECK: vpexpandb  268435456(%rcx,%r14,8), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x62,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandb  268435456(%rcx,%r14,8), %xmm21 {%k2}
+
+// CHECK: vpexpandb  -536870912(%rcx,%r14,8), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x62,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandb  -536870912(%rcx,%r14,8), %xmm21 {%k2}
+
+// CHECK: vpexpandb  -536870910(%rcx,%r14,8), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x62,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandb  -536870910(%rcx,%r14,8), %xmm21 {%k2}
+
+// CHECK: vpexpandw  (%rcx), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x62,0x29]
+          vpexpandw  (%rcx), %xmm21 {%k2}
+
+// CHECK: vpexpandw  -8(%rsp), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x62,0x6c,0x24,0xfc]
+          vpexpandw  -8(%rsp), %xmm21 {%k2}
+
+// CHECK: vpexpandw  8(%rsp), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x62,0x6c,0x24,0x04]
+          vpexpandw  8(%rsp), %xmm21 {%k2}
+
+// CHECK: vpexpandw  268435456(%rcx,%r14,8), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x62,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandw  268435456(%rcx,%r14,8), %xmm21 {%k2}
+
+// CHECK: vpexpandw  -536870912(%rcx,%r14,8), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x62,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandw  -536870912(%rcx,%r14,8), %xmm21 {%k2}
+
+// CHECK: vpexpandw  -536870910(%rcx,%r14,8), %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x62,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandw  -536870910(%rcx,%r14,8), %xmm21 {%k2}
+
+// CHECK: vpexpandb %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x62,0xcb]
+          vpexpandb %ymm3, %ymm1
+
+// CHECK: vpexpandw %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x62,0xcb]
+          vpexpandw %ymm3, %ymm1
+
+// CHECK: vpexpandb %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x62,0xef]
+          vpexpandb %ymm23, %ymm21
+
+// CHECK: vpexpandw %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x62,0xef]
+          vpexpandw %ymm23, %ymm21
+
+// CHECK: vpexpandb %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x2a,0x62,0xcb]
+          vpexpandb %ymm3, %ymm1 {%k2}
+
+// CHECK: vpexpandw %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x2a,0x62,0xcb]
+          vpexpandw %ymm3, %ymm1 {%k2}
+
+// CHECK: vpexpandb %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x62,0xef]
+          vpexpandb %ymm23, %ymm21 {%k2}
+
+// CHECK: vpexpandw %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x62,0xef]
+          vpexpandw %ymm23, %ymm21 {%k2}
+
+// CHECK: vpexpandb  (%rcx), %ymm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x62,0x09]
+          vpexpandb  (%rcx), %ymm1
+
+// CHECK: vpexpandb  -4(%rsp), %ymm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x62,0x4c,0x24,0xfc]
+          vpexpandb  -4(%rsp), %ymm1
+
+// CHECK: vpexpandb  4(%rsp), %ymm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x62,0x4c,0x24,0x04]
+          vpexpandb  4(%rsp), %ymm1
+
+// CHECK: vpexpandb  268435456(%rcx,%r14,8), %ymm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x62,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandb  268435456(%rcx,%r14,8), %ymm1
+
+// CHECK: vpexpandb  -536870912(%rcx,%r14,8), %ymm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x62,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandb  -536870912(%rcx,%r14,8), %ymm1
+
+// CHECK: vpexpandb  -536870910(%rcx,%r14,8), %ymm1
+// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x62,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandb  -536870910(%rcx,%r14,8), %ymm1
+
+// CHECK: vpexpandw  (%rcx), %ymm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x62,0x09]
+          vpexpandw  (%rcx), %ymm1
+
+// CHECK: vpexpandw  -8(%rsp), %ymm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x62,0x4c,0x24,0xfc]
+          vpexpandw  -8(%rsp), %ymm1
+
+// CHECK: vpexpandw  8(%rsp), %ymm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x62,0x4c,0x24,0x04]
+          vpexpandw  8(%rsp), %ymm1
+
+// CHECK: vpexpandw  268435456(%rcx,%r14,8), %ymm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x62,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandw  268435456(%rcx,%r14,8), %ymm1
+
+// CHECK: vpexpandw  -536870912(%rcx,%r14,8), %ymm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x62,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandw  -536870912(%rcx,%r14,8), %ymm1
+
+// CHECK: vpexpandw  -536870910(%rcx,%r14,8), %ymm1
+// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x62,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandw  -536870910(%rcx,%r14,8), %ymm1
+
+// CHECK: vpexpandb  (%rcx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x62,0x29]
+          vpexpandb  (%rcx), %ymm21
+
+// CHECK: vpexpandb  -4(%rsp), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x62,0x6c,0x24,0xfc]
+          vpexpandb  -4(%rsp), %ymm21
+
+// CHECK: vpexpandb  4(%rsp), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x62,0x6c,0x24,0x04]
+          vpexpandb  4(%rsp), %ymm21
+
+// CHECK: vpexpandb  268435456(%rcx,%r14,8), %ymm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x62,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandb  268435456(%rcx,%r14,8), %ymm21
+
+// CHECK: vpexpandb  -536870912(%rcx,%r14,8), %ymm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x62,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandb  -536870912(%rcx,%r14,8), %ymm21
+
+// CHECK: vpexpandb  -536870910(%rcx,%r14,8), %ymm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x62,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandb  -536870910(%rcx,%r14,8), %ymm21
+
+// CHECK: vpexpandw  (%rcx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x62,0x29]
+          vpexpandw  (%rcx), %ymm21
+
+// CHECK: vpexpandw  -8(%rsp), %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x62,0x6c,0x24,0xfc]
+          vpexpandw  -8(%rsp), %ymm21
+
+// CHECK: vpexpandw  8(%rsp), %ymm21
+// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x62,0x6c,0x24,0x04]
+          vpexpandw  8(%rsp), %ymm21
+
+// CHECK: vpexpandw  268435456(%rcx,%r14,8), %ymm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x62,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandw  268435456(%rcx,%r14,8), %ymm21
+
+// CHECK: vpexpandw  -536870912(%rcx,%r14,8), %ymm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x62,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandw  -536870912(%rcx,%r14,8), %ymm21
+
+// CHECK: vpexpandw  -536870910(%rcx,%r14,8), %ymm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x62,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandw  -536870910(%rcx,%r14,8), %ymm21
+
+// CHECK: vpexpandb  (%rcx), %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x2a,0x62,0x09]
+          vpexpandb  (%rcx), %ymm1 {%k2}
+
+// CHECK: vpexpandb  -4(%rsp), %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x2a,0x62,0x4c,0x24,0xfc]
+          vpexpandb  -4(%rsp), %ymm1 {%k2}
+
+// CHECK: vpexpandb  4(%rsp), %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x2a,0x62,0x4c,0x24,0x04]
+          vpexpandb  4(%rsp), %ymm1 {%k2}
+
+// CHECK: vpexpandb  268435456(%rcx,%r14,8), %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x7d,0x2a,0x62,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandb  268435456(%rcx,%r14,8), %ymm1 {%k2}
+
+// CHECK: vpexpandb  -536870912(%rcx,%r14,8), %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x7d,0x2a,0x62,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandb  -536870912(%rcx,%r14,8), %ymm1 {%k2}
+
+// CHECK: vpexpandb  -536870910(%rcx,%r14,8), %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x7d,0x2a,0x62,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandb  -536870910(%rcx,%r14,8), %ymm1 {%k2}
+
+// CHECK: vpexpandw  (%rcx), %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x2a,0x62,0x09]
+          vpexpandw  (%rcx), %ymm1 {%k2}
+
+// CHECK: vpexpandw  -8(%rsp), %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x2a,0x62,0x4c,0x24,0xfc]
+          vpexpandw  -8(%rsp), %ymm1 {%k2}
+
+// CHECK: vpexpandw  8(%rsp), %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x2a,0x62,0x4c,0x24,0x04]
+          vpexpandw  8(%rsp), %ymm1 {%k2}
+
+// CHECK: vpexpandw  268435456(%rcx,%r14,8), %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xfd,0x2a,0x62,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandw  268435456(%rcx,%r14,8), %ymm1 {%k2}
+
+// CHECK: vpexpandw  -536870912(%rcx,%r14,8), %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xfd,0x2a,0x62,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandw  -536870912(%rcx,%r14,8), %ymm1 {%k2}
+
+// CHECK: vpexpandw  -536870910(%rcx,%r14,8), %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xfd,0x2a,0x62,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandw  -536870910(%rcx,%r14,8), %ymm1 {%k2}
+
+// CHECK: vpexpandb  (%rcx), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x62,0x29]
+          vpexpandb  (%rcx), %ymm21 {%k2}
+
+// CHECK: vpexpandb  -4(%rsp), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x62,0x6c,0x24,0xfc]
+          vpexpandb  -4(%rsp), %ymm21 {%k2}
+
+// CHECK: vpexpandb  4(%rsp), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x62,0x6c,0x24,0x04]
+          vpexpandb  4(%rsp), %ymm21 {%k2}
+
+// CHECK: vpexpandb  268435456(%rcx,%r14,8), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x62,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandb  268435456(%rcx,%r14,8), %ymm21 {%k2}
+
+// CHECK: vpexpandb  -536870912(%rcx,%r14,8), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x62,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandb  -536870912(%rcx,%r14,8), %ymm21 {%k2}
+
+// CHECK: vpexpandb  -536870910(%rcx,%r14,8), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x62,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandb  -536870910(%rcx,%r14,8), %ymm21 {%k2}
+
+// CHECK: vpexpandw  (%rcx), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x62,0x29]
+          vpexpandw  (%rcx), %ymm21 {%k2}
+
+// CHECK: vpexpandw  -8(%rsp), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x62,0x6c,0x24,0xfc]
+          vpexpandw  -8(%rsp), %ymm21 {%k2}
+
+// CHECK: vpexpandw  8(%rsp), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x62,0x6c,0x24,0x04]
+          vpexpandw  8(%rsp), %ymm21 {%k2}
+
+// CHECK: vpexpandw  268435456(%rcx,%r14,8), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x62,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpexpandw  268435456(%rcx,%r14,8), %ymm21 {%k2}
+
+// CHECK: vpexpandw  -536870912(%rcx,%r14,8), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x62,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpexpandw  -536870912(%rcx,%r14,8), %ymm21 {%k2}
+
+// CHECK: vpexpandw  -536870910(%rcx,%r14,8), %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x62,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpexpandw  -536870910(%rcx,%r14,8), %ymm21 {%k2}
+
+// CHECK: vpcompressb %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x63,0xd9]
+          vpcompressb %xmm3, %xmm1
+
+// CHECK: vpcompressw %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x63,0xd9]
+          vpcompressw %xmm3, %xmm1
+
+// CHECK: vpcompressb %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x63,0xfd]
+          vpcompressb %xmm23, %xmm21
+
+// CHECK: vpcompressw %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x63,0xfd]
+          vpcompressw %xmm23, %xmm21
+
+// CHECK: vpcompressb %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x0a,0x63,0xd9]
+          vpcompressb %xmm3, %xmm1 {%k2}
+
+// CHECK: vpcompressw %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x0a,0x63,0xd9]
+          vpcompressw %xmm3, %xmm1 {%k2}
+
+// CHECK: vpcompressb %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x63,0xfd]
+          vpcompressb %xmm23, %xmm21 {%k2}
+
+// CHECK: vpcompressw %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x63,0xfd]
+          vpcompressw %xmm23, %xmm21 {%k2}
+
+// CHECK: vpcompressb  %xmm1, (%rcx)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x63,0x09]
+          vpcompressb  %xmm1, (%rcx)
+
+// CHECK: vpcompressb  %xmm1, -4(%rsp)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x63,0x4c,0x24,0xfc]
+          vpcompressb  %xmm1, -4(%rsp)
+
+// CHECK: vpcompressb  %xmm1, 4(%rsp)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x63,0x4c,0x24,0x04]
+          vpcompressb  %xmm1, 4(%rsp)
+
+// CHECK: vpcompressb  %xmm1, 268435456(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x63,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressb  %xmm1, 268435456(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %xmm1, -536870912(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x63,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressb  %xmm1, -536870912(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %xmm1, -536870910(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x63,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressb  %xmm1, -536870910(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %xmm1, (%rcx)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x63,0x09]
+          vpcompressw  %xmm1, (%rcx)
+
+// CHECK: vpcompressw  %xmm1, -8(%rsp)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x63,0x4c,0x24,0xfc]
+          vpcompressw  %xmm1, -8(%rsp)
+
+// CHECK: vpcompressw  %xmm1, 8(%rsp)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x63,0x4c,0x24,0x04]
+          vpcompressw  %xmm1, 8(%rsp)
+
+// CHECK: vpcompressw  %xmm1, 268435456(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x63,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressw  %xmm1, 268435456(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %xmm1, -536870912(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x63,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressw  %xmm1, -536870912(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %xmm1, -536870910(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x63,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressw  %xmm1, -536870910(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %xmm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x63,0x29]
+          vpcompressb  %xmm21, (%rcx) {%k2}
+
+// CHECK: vpcompressb  %xmm21, -4(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x63,0x6c,0x24,0xfc]
+          vpcompressb  %xmm21, -4(%rsp) {%k2}
+
+// CHECK: vpcompressb  %xmm21, 4(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x63,0x6c,0x24,0x04]
+          vpcompressb  %xmm21, 4(%rsp) {%k2}
+
+// CHECK: vpcompressb  %xmm21, 268435456(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x63,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressb  %xmm21, 268435456(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %xmm21, -536870912(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x63,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressb  %xmm21, -536870912(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %xmm21, -536870910(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x63,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressb  %xmm21, -536870910(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %xmm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x63,0x29]
+          vpcompressw  %xmm21, (%rcx) {%k2}
+
+// CHECK: vpcompressw  %xmm21, -8(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x63,0x6c,0x24,0xfc]
+          vpcompressw  %xmm21, -8(%rsp) {%k2}
+
+// CHECK: vpcompressw  %xmm21, 8(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x63,0x6c,0x24,0x04]
+          vpcompressw  %xmm21, 8(%rsp) {%k2}
+
+// CHECK: vpcompressw  %xmm21, 268435456(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x63,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressw  %xmm21, 268435456(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %xmm21, -536870912(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x63,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressw  %xmm21, -536870912(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %xmm21, -536870910(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x63,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressw  %xmm21, -536870910(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %xmm1, (%rcx)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x63,0x09]
+          vpcompressb  %xmm1, (%rcx)
+
+// CHECK: vpcompressb  %xmm1, -4(%rsp)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x63,0x4c,0x24,0xfc]
+          vpcompressb  %xmm1, -4(%rsp)
+
+// CHECK: vpcompressb  %xmm1, 4(%rsp)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x63,0x4c,0x24,0x04]
+          vpcompressb  %xmm1, 4(%rsp)
+
+// CHECK: vpcompressb  %xmm1, 268435456(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x63,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressb  %xmm1, 268435456(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %xmm1, -536870912(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x63,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressb  %xmm1, -536870912(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %xmm1, -536870910(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x63,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressb  %xmm1, -536870910(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %xmm1, (%rcx)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x63,0x09]
+          vpcompressw  %xmm1, (%rcx)
+
+// CHECK: vpcompressw  %xmm1, -8(%rsp)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x63,0x4c,0x24,0xfc]
+          vpcompressw  %xmm1, -8(%rsp)
+
+// CHECK: vpcompressw  %xmm1, 8(%rsp)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x63,0x4c,0x24,0x04]
+          vpcompressw  %xmm1, 8(%rsp)
+
+// CHECK: vpcompressw  %xmm1, 268435456(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x63,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressw  %xmm1, 268435456(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %xmm1, -536870912(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x63,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressw  %xmm1, -536870912(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %xmm1, -536870910(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x63,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressw  %xmm1, -536870910(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %xmm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x63,0x29]
+          vpcompressb  %xmm21, (%rcx) {%k2}
+
+// CHECK: vpcompressb  %xmm21, -4(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x63,0x6c,0x24,0xfc]
+          vpcompressb  %xmm21, -4(%rsp) {%k2}
+
+// CHECK: vpcompressb  %xmm21, 4(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x63,0x6c,0x24,0x04]
+          vpcompressb  %xmm21, 4(%rsp) {%k2}
+
+// CHECK: vpcompressb  %xmm21, 268435456(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x63,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressb  %xmm21, 268435456(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %xmm21, -536870912(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x63,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressb  %xmm21, -536870912(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %xmm21, -536870910(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x63,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressb  %xmm21, -536870910(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %xmm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x63,0x29]
+          vpcompressw  %xmm21, (%rcx) {%k2}
+
+// CHECK: vpcompressw  %xmm21, -8(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x63,0x6c,0x24,0xfc]
+          vpcompressw  %xmm21, -8(%rsp) {%k2}
+
+// CHECK: vpcompressw  %xmm21, 8(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x63,0x6c,0x24,0x04]
+          vpcompressw  %xmm21, 8(%rsp) {%k2}
+
+// CHECK: vpcompressw  %xmm21, 268435456(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x63,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressw  %xmm21, 268435456(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %xmm21, -536870912(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x63,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressw  %xmm21, -536870912(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %xmm21, -536870910(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x63,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressw  %xmm21, -536870910(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x63,0xd9]
+          vpcompressb %ymm3, %ymm1
+
+// CHECK: vpcompressw %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x63,0xd9]
+          vpcompressw %ymm3, %ymm1
+
+// CHECK: vpcompressb %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x63,0xfd]
+          vpcompressb %ymm23, %ymm21
+
+// CHECK: vpcompressw %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x63,0xfd]
+          vpcompressw %ymm23, %ymm21
+
+// CHECK: vpcompressb %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x7d,0x2a,0x63,0xd9]
+          vpcompressb %ymm3, %ymm1 {%k2}
+
+// CHECK: vpcompressw %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xfd,0x2a,0x63,0xd9]
+          vpcompressw %ymm3, %ymm1 {%k2}
+
+// CHECK: vpcompressb %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x63,0xfd]
+          vpcompressb %ymm23, %ymm21 {%k2}
+
+// CHECK: vpcompressw %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x63,0xfd]
+          vpcompressw %ymm23, %ymm21 {%k2}
+
+// CHECK: vpcompressb  %ymm1, (%rcx)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x63,0x09]
+          vpcompressb  %ymm1, (%rcx)
+
+// CHECK: vpcompressb  %ymm1, -4(%rsp)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x63,0x4c,0x24,0xfc]
+          vpcompressb  %ymm1, -4(%rsp)
+
+// CHECK: vpcompressb  %ymm1, 4(%rsp)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x63,0x4c,0x24,0x04]
+          vpcompressb  %ymm1, 4(%rsp)
+
+// CHECK: vpcompressb  %ymm1, 268435456(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x63,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressb  %ymm1, 268435456(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %ymm1, -536870912(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x63,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressb  %ymm1, -536870912(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %ymm1, -536870910(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x63,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressb  %ymm1, -536870910(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %ymm1, (%rcx)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x63,0x09]
+          vpcompressw  %ymm1, (%rcx)
+
+// CHECK: vpcompressw  %ymm1, -8(%rsp)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x63,0x4c,0x24,0xfc]
+          vpcompressw  %ymm1, -8(%rsp)
+
+// CHECK: vpcompressw  %ymm1, 8(%rsp)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x63,0x4c,0x24,0x04]
+          vpcompressw  %ymm1, 8(%rsp)
+
+// CHECK: vpcompressw  %ymm1, 268435456(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x63,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressw  %ymm1, 268435456(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %ymm1, -536870912(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x63,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressw  %ymm1, -536870912(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %ymm1, -536870910(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x63,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressw  %ymm1, -536870910(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %ymm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x63,0x29]
+          vpcompressb  %ymm21, (%rcx) {%k2}
+
+// CHECK: vpcompressb  %ymm21, -4(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x63,0x6c,0x24,0xfc]
+          vpcompressb  %ymm21, -4(%rsp) {%k2}
+
+// CHECK: vpcompressb  %ymm21, 4(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x63,0x6c,0x24,0x04]
+          vpcompressb  %ymm21, 4(%rsp) {%k2}
+
+// CHECK: vpcompressb  %ymm21, 268435456(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x63,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressb  %ymm21, 268435456(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %ymm21, -536870912(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x63,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressb  %ymm21, -536870912(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %ymm21, -536870910(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x63,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressb  %ymm21, -536870910(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %ymm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x63,0x29]
+          vpcompressw  %ymm21, (%rcx) {%k2}
+
+// CHECK: vpcompressw  %ymm21, -8(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x63,0x6c,0x24,0xfc]
+          vpcompressw  %ymm21, -8(%rsp) {%k2}
+
+// CHECK: vpcompressw  %ymm21, 8(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x63,0x6c,0x24,0x04]
+          vpcompressw  %ymm21, 8(%rsp) {%k2}
+
+// CHECK: vpcompressw  %ymm21, 268435456(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x63,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressw  %ymm21, 268435456(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %ymm21, -536870912(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x63,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressw  %ymm21, -536870912(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %ymm21, -536870910(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x63,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressw  %ymm21, -536870910(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %ymm1, (%rcx)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x63,0x09]
+          vpcompressb  %ymm1, (%rcx)
+
+// CHECK: vpcompressb  %ymm1, -4(%rsp)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x63,0x4c,0x24,0xfc]
+          vpcompressb  %ymm1, -4(%rsp)
+
+// CHECK: vpcompressb  %ymm1, 4(%rsp)
+// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x63,0x4c,0x24,0x04]
+          vpcompressb  %ymm1, 4(%rsp)
+
+// CHECK: vpcompressb  %ymm1, 268435456(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x63,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressb  %ymm1, 268435456(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %ymm1, -536870912(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x63,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressb  %ymm1, -536870912(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %ymm1, -536870910(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x63,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressb  %ymm1, -536870910(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %ymm1, (%rcx)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x63,0x09]
+          vpcompressw  %ymm1, (%rcx)
+
+// CHECK: vpcompressw  %ymm1, -8(%rsp)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x63,0x4c,0x24,0xfc]
+          vpcompressw  %ymm1, -8(%rsp)
+
+// CHECK: vpcompressw  %ymm1, 8(%rsp)
+// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x63,0x4c,0x24,0x04]
+          vpcompressw  %ymm1, 8(%rsp)
+
+// CHECK: vpcompressw  %ymm1, 268435456(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x63,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressw  %ymm1, 268435456(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %ymm1, -536870912(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x63,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressw  %ymm1, -536870912(%rcx,%r14,8)
+
+// CHECK: vpcompressw  %ymm1, -536870910(%rcx,%r14,8)
+// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x63,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressw  %ymm1, -536870910(%rcx,%r14,8)
+
+// CHECK: vpcompressb  %ymm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x63,0x29]
+          vpcompressb  %ymm21, (%rcx) {%k2}
+
+// CHECK: vpcompressb  %ymm21, -4(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x63,0x6c,0x24,0xfc]
+          vpcompressb  %ymm21, -4(%rsp) {%k2}
+
+// CHECK: vpcompressb  %ymm21, 4(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x63,0x6c,0x24,0x04]
+          vpcompressb  %ymm21, 4(%rsp) {%k2}
+
+// CHECK: vpcompressb  %ymm21, 268435456(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x63,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressb  %ymm21, 268435456(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %ymm21, -536870912(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x63,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressb  %ymm21, -536870912(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressb  %ymm21, -536870910(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x63,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressb  %ymm21, -536870910(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %ymm21, (%rcx) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x63,0x29]
+          vpcompressw  %ymm21, (%rcx) {%k2}
+
+// CHECK: vpcompressw  %ymm21, -8(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x63,0x6c,0x24,0xfc]
+          vpcompressw  %ymm21, -8(%rsp) {%k2}
+
+// CHECK: vpcompressw  %ymm21, 8(%rsp) {%k2}
+// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x63,0x6c,0x24,0x04]
+          vpcompressw  %ymm21, 8(%rsp) {%k2}
+
+// CHECK: vpcompressw  %ymm21, 268435456(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x63,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpcompressw  %ymm21, 268435456(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %ymm21, -536870912(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x63,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpcompressw  %ymm21, -536870912(%rcx,%r14,8) {%k2}
+
+// CHECK: vpcompressw  %ymm21, -536870910(%rcx,%r14,8) {%k2}
+// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x63,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpcompressw  %ymm21, -536870910(%rcx,%r14,8) {%k2}
+
+// CHECK: vpshldw $7, %xmm3, %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x70,0xcb,0x07]
+          vpshldw $7, %xmm3, %xmm3, %xmm1
+
+// CHECK: vpshldd $7, %xmm3, %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x08,0x71,0xcb,0x07]
+          vpshldd $7, %xmm3, %xmm3, %xmm1
+
+// CHECK: vpshldq $7, %xmm3, %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x71,0xcb,0x07]
+          vpshldq $7, %xmm3, %xmm3, %xmm1
+
+// CHECK: vpshrdw $7, %xmm3, %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x72,0xcb,0x07]
+          vpshrdw $7, %xmm3, %xmm3, %xmm1
+
+// CHECK: vpshrdd $7, %xmm3, %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x08,0x73,0xcb,0x07]
+          vpshrdd $7, %xmm3, %xmm3, %xmm1
+
+// CHECK: vpshrdq $7, %xmm3, %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x73,0xcb,0x07]
+          vpshrdq $7, %xmm3, %xmm3, %xmm1
+
+// CHECK: vpshldw $7, %xmm23, %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x70,0xef,0x07]
+          vpshldw $7, %xmm23, %xmm23, %xmm21
+
+// CHECK: vpshldd $7, %xmm23, %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x00,0x71,0xef,0x07]
+          vpshldd $7, %xmm23, %xmm23, %xmm21
+
+// CHECK: vpshldq $7, %xmm23, %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x71,0xef,0x07]
+          vpshldq $7, %xmm23, %xmm23, %xmm21
+
+// CHECK: vpshrdw $7, %xmm23, %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x72,0xef,0x07]
+          vpshrdw $7, %xmm23, %xmm23, %xmm21
+
+// CHECK: vpshrdd $7, %xmm23, %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x00,0x73,0xef,0x07]
+          vpshrdd $7, %xmm23, %xmm23, %xmm21
+
+// CHECK: vpshrdq $7, %xmm23, %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x73,0xef,0x07]
+          vpshrdq $7, %xmm23, %xmm23, %xmm21
+
+// CHECK: vpshldw $7, %xmm3, %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x70,0xcb,0x07]
+          vpshldw $7, %xmm3, %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldd $7, %xmm3, %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x0a,0x71,0xcb,0x07]
+          vpshldd $7, %xmm3, %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldq $7, %xmm3, %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x71,0xcb,0x07]
+          vpshldq $7, %xmm3, %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdw $7, %xmm3, %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x72,0xcb,0x07]
+          vpshrdw $7, %xmm3, %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdd $7, %xmm3, %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x0a,0x73,0xcb,0x07]
+          vpshrdd $7, %xmm3, %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdq $7, %xmm3, %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x73,0xcb,0x07]
+          vpshrdq $7, %xmm3, %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldw $7, %xmm23, %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x70,0xef,0x07]
+          vpshldw $7, %xmm23, %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldd $7, %xmm23, %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x02,0x71,0xef,0x07]
+          vpshldd $7, %xmm23, %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldq $7, %xmm23, %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x71,0xef,0x07]
+          vpshldq $7, %xmm23, %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdw $7, %xmm23, %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x72,0xef,0x07]
+          vpshrdw $7, %xmm23, %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdd $7, %xmm23, %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x02,0x73,0xef,0x07]
+          vpshrdd $7, %xmm23, %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdq $7, %xmm23, %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x73,0xef,0x07]
+          vpshrdq $7, %xmm23, %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldw  $7, (%rcx), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x70,0x09,0x07]
+          vpshldw  $7, (%rcx), %xmm3, %xmm1
+
+// CHECK: vpshldw  $7, -64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x70,0x4c,0x24,0xfc,0x07]
+          vpshldw  $7, -64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshldw  $7, 64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x70,0x4c,0x24,0x04,0x07]
+          vpshldw  $7, 64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshldw  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x08,0x70,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldw  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldw  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x08,0x70,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldw  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x08,0x70,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldd  $7, (%rcx), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x08,0x71,0x09,0x07]
+          vpshldd  $7, (%rcx), %xmm3, %xmm1
+
+// CHECK: vpshldd  $7, -64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x08,0x71,0x4c,0x24,0xfc,0x07]
+          vpshldd  $7, -64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshldd  $7, 64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x08,0x71,0x4c,0x24,0x04,0x07]
+          vpshldd  $7, 64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshldd  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x08,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldd  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldd  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x08,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldd  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x08,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldq  $7, (%rcx), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x71,0x09,0x07]
+          vpshldq  $7, (%rcx), %xmm3, %xmm1
+
+// CHECK: vpshldq  $7, -64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x71,0x4c,0x24,0xfc,0x07]
+          vpshldq  $7, -64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshldq  $7, 64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x71,0x4c,0x24,0x04,0x07]
+          vpshldq  $7, 64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshldq  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x08,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldq  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldq  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x08,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldq  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x08,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdw  $7, (%rcx), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x72,0x09,0x07]
+          vpshrdw  $7, (%rcx), %xmm3, %xmm1
+
+// CHECK: vpshrdw  $7, -64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x72,0x4c,0x24,0xfc,0x07]
+          vpshrdw  $7, -64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshrdw  $7, 64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x72,0x4c,0x24,0x04,0x07]
+          vpshrdw  $7, 64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshrdw  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x08,0x72,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdw  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdw  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x08,0x72,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdw  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x08,0x72,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdd  $7, (%rcx), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x08,0x73,0x09,0x07]
+          vpshrdd  $7, (%rcx), %xmm3, %xmm1
+
+// CHECK: vpshrdd  $7, -64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x08,0x73,0x4c,0x24,0xfc,0x07]
+          vpshrdd  $7, -64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshrdd  $7, 64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x08,0x73,0x4c,0x24,0x04,0x07]
+          vpshrdd  $7, 64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshrdd  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x08,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdd  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdd  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x08,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdd  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x08,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdq  $7, (%rcx), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x73,0x09,0x07]
+          vpshrdq  $7, (%rcx), %xmm3, %xmm1
+
+// CHECK: vpshrdq  $7, -64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x73,0x4c,0x24,0xfc,0x07]
+          vpshrdq  $7, -64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshrdq  $7, 64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x08,0x73,0x4c,0x24,0x04,0x07]
+          vpshrdq  $7, 64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshrdq  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x08,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdq  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdq  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x08,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdq  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x08,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldw  $7, (%rcx), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x00,0x70,0x29,0x07]
+          vpshldw  $7, (%rcx), %xmm23, %xmm21
+
+// CHECK: vpshldw  $7, -64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x00,0x70,0x6c,0x24,0xfc,0x07]
+          vpshldw  $7, -64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshldw  $7, 64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x00,0x70,0x6c,0x24,0x04,0x07]
+          vpshldw  $7, 64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshldw  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x70,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldw  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldw  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x70,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldw  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x70,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldd  $7, (%rcx), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x00,0x71,0x29,0x07]
+          vpshldd  $7, (%rcx), %xmm23, %xmm21
+
+// CHECK: vpshldd  $7, -64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x00,0x71,0x6c,0x24,0xfc,0x07]
+          vpshldd  $7, -64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshldd  $7, 64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x00,0x71,0x6c,0x24,0x04,0x07]
+          vpshldd  $7, 64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshldd  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x00,0x71,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldd  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldd  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x00,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldd  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x00,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldq  $7, (%rcx), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x00,0x71,0x29,0x07]
+          vpshldq  $7, (%rcx), %xmm23, %xmm21
+
+// CHECK: vpshldq  $7, -64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x00,0x71,0x6c,0x24,0xfc,0x07]
+          vpshldq  $7, -64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshldq  $7, 64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x00,0x71,0x6c,0x24,0x04,0x07]
+          vpshldq  $7, 64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshldq  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x71,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldq  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldq  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldq  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdw  $7, (%rcx), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x00,0x72,0x29,0x07]
+          vpshrdw  $7, (%rcx), %xmm23, %xmm21
+
+// CHECK: vpshrdw  $7, -64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x00,0x72,0x6c,0x24,0xfc,0x07]
+          vpshrdw  $7, -64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshrdw  $7, 64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x00,0x72,0x6c,0x24,0x04,0x07]
+          vpshrdw  $7, 64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshrdw  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x72,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdw  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdw  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x72,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdw  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x72,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdd  $7, (%rcx), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x00,0x73,0x29,0x07]
+          vpshrdd  $7, (%rcx), %xmm23, %xmm21
+
+// CHECK: vpshrdd  $7, -64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x00,0x73,0x6c,0x24,0xfc,0x07]
+          vpshrdd  $7, -64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshrdd  $7, 64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x00,0x73,0x6c,0x24,0x04,0x07]
+          vpshrdd  $7, 64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshrdd  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x00,0x73,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdd  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdd  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x00,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdd  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x00,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdq  $7, (%rcx), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x00,0x73,0x29,0x07]
+          vpshrdq  $7, (%rcx), %xmm23, %xmm21
+
+// CHECK: vpshrdq  $7, -64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x00,0x73,0x6c,0x24,0xfc,0x07]
+          vpshrdq  $7, -64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshrdq  $7, 64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x00,0x73,0x6c,0x24,0x04,0x07]
+          vpshrdq  $7, 64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshrdq  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x73,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdq  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdq  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdq  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x00,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldw  $7, (%rcx), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x70,0x09,0x07]
+          vpshldw  $7, (%rcx), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldw  $7, -64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x70,0x4c,0x24,0xfc,0x07]
+          vpshldw  $7, -64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldw  $7, 64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x70,0x4c,0x24,0x04,0x07]
+          vpshldw  $7, 64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldw  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x0a,0x70,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldw  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldw  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x0a,0x70,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldw  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x0a,0x70,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldd  $7, (%rcx), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x0a,0x71,0x09,0x07]
+          vpshldd  $7, (%rcx), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldd  $7, -64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x0a,0x71,0x4c,0x24,0xfc,0x07]
+          vpshldd  $7, -64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldd  $7, 64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x0a,0x71,0x4c,0x24,0x04,0x07]
+          vpshldd  $7, 64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldd  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x0a,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldd  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldd  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x0a,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldd  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x0a,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldq  $7, (%rcx), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x71,0x09,0x07]
+          vpshldq  $7, (%rcx), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldq  $7, -64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x71,0x4c,0x24,0xfc,0x07]
+          vpshldq  $7, -64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldq  $7, 64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x71,0x4c,0x24,0x04,0x07]
+          vpshldq  $7, 64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldq  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x0a,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldq  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldq  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x0a,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldq  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x0a,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdw  $7, (%rcx), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x72,0x09,0x07]
+          vpshrdw  $7, (%rcx), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdw  $7, -64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x72,0x4c,0x24,0xfc,0x07]
+          vpshrdw  $7, -64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdw  $7, 64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x72,0x4c,0x24,0x04,0x07]
+          vpshrdw  $7, 64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdw  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x0a,0x72,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdw  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdw  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x0a,0x72,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdw  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x0a,0x72,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdd  $7, (%rcx), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x0a,0x73,0x09,0x07]
+          vpshrdd  $7, (%rcx), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdd  $7, -64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x0a,0x73,0x4c,0x24,0xfc,0x07]
+          vpshrdd  $7, -64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdd  $7, 64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x0a,0x73,0x4c,0x24,0x04,0x07]
+          vpshrdd  $7, 64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdd  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x0a,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdd  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdd  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x0a,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdd  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x0a,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdq  $7, (%rcx), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x73,0x09,0x07]
+          vpshrdq  $7, (%rcx), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdq  $7, -64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x73,0x4c,0x24,0xfc,0x07]
+          vpshrdq  $7, -64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdq  $7, 64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x0a,0x73,0x4c,0x24,0x04,0x07]
+          vpshrdq  $7, 64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdq  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x0a,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdq  $7, 268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdq  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x0a,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdq  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x0a,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldw  $7, (%rcx), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x02,0x70,0x29,0x07]
+          vpshldw  $7, (%rcx), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldw  $7, -64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x02,0x70,0x6c,0x24,0xfc,0x07]
+          vpshldw  $7, -64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldw  $7, 64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x02,0x70,0x6c,0x24,0x04,0x07]
+          vpshldw  $7, 64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldw  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x70,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldw  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldw  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x70,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldw  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x70,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldd  $7, (%rcx), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x02,0x71,0x29,0x07]
+          vpshldd  $7, (%rcx), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldd  $7, -64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x02,0x71,0x6c,0x24,0xfc,0x07]
+          vpshldd  $7, -64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldd  $7, 64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x02,0x71,0x6c,0x24,0x04,0x07]
+          vpshldd  $7, 64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldd  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x02,0x71,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldd  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldd  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x02,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldd  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x02,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldq  $7, (%rcx), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x02,0x71,0x29,0x07]
+          vpshldq  $7, (%rcx), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldq  $7, -64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x02,0x71,0x6c,0x24,0xfc,0x07]
+          vpshldq  $7, -64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldq  $7, 64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x02,0x71,0x6c,0x24,0x04,0x07]
+          vpshldq  $7, 64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldq  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x71,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldq  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldq  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldq  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdw  $7, (%rcx), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x02,0x72,0x29,0x07]
+          vpshrdw  $7, (%rcx), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdw  $7, -64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x02,0x72,0x6c,0x24,0xfc,0x07]
+          vpshrdw  $7, -64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdw  $7, 64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x02,0x72,0x6c,0x24,0x04,0x07]
+          vpshrdw  $7, 64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdw  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x72,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdw  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdw  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x72,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdw  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x72,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdd  $7, (%rcx), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x02,0x73,0x29,0x07]
+          vpshrdd  $7, (%rcx), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdd  $7, -64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x02,0x73,0x6c,0x24,0xfc,0x07]
+          vpshrdd  $7, -64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdd  $7, 64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x02,0x73,0x6c,0x24,0x04,0x07]
+          vpshrdd  $7, 64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdd  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x02,0x73,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdd  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdd  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x02,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdd  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x02,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdq  $7, (%rcx), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x02,0x73,0x29,0x07]
+          vpshrdq  $7, (%rcx), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdq  $7, -64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x02,0x73,0x6c,0x24,0xfc,0x07]
+          vpshrdq  $7, -64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdq  $7, 64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x02,0x73,0x6c,0x24,0x04,0x07]
+          vpshrdq  $7, 64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdq  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x73,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdq  $7, 268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdq  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdq  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x02,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldw $7, %ymm3, %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x70,0xcb,0x07]
+          vpshldw $7, %ymm3, %ymm3, %ymm1
+
+// CHECK: vpshldd $7, %ymm3, %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x28,0x71,0xcb,0x07]
+          vpshldd $7, %ymm3, %ymm3, %ymm1
+
+// CHECK: vpshldq $7, %ymm3, %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x71,0xcb,0x07]
+          vpshldq $7, %ymm3, %ymm3, %ymm1
+
+// CHECK: vpshrdw $7, %ymm3, %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x72,0xcb,0x07]
+          vpshrdw $7, %ymm3, %ymm3, %ymm1
+
+// CHECK: vpshrdd $7, %ymm3, %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x28,0x73,0xcb,0x07]
+          vpshrdd $7, %ymm3, %ymm3, %ymm1
+
+// CHECK: vpshrdq $7, %ymm3, %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x73,0xcb,0x07]
+          vpshrdq $7, %ymm3, %ymm3, %ymm1
+
+// CHECK: vpshldw $7, %ymm23, %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x70,0xef,0x07]
+          vpshldw $7, %ymm23, %ymm23, %ymm21
+
+// CHECK: vpshldd $7, %ymm23, %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x20,0x71,0xef,0x07]
+          vpshldd $7, %ymm23, %ymm23, %ymm21
+
+// CHECK: vpshldq $7, %ymm23, %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x71,0xef,0x07]
+          vpshldq $7, %ymm23, %ymm23, %ymm21
+
+// CHECK: vpshrdw $7, %ymm23, %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x72,0xef,0x07]
+          vpshrdw $7, %ymm23, %ymm23, %ymm21
+
+// CHECK: vpshrdd $7, %ymm23, %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x20,0x73,0xef,0x07]
+          vpshrdd $7, %ymm23, %ymm23, %ymm21
+
+// CHECK: vpshrdq $7, %ymm23, %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x73,0xef,0x07]
+          vpshrdq $7, %ymm23, %ymm23, %ymm21
+
+// CHECK: vpshldw $7, %ymm3, %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x70,0xcb,0x07]
+          vpshldw $7, %ymm3, %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldd $7, %ymm3, %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x2a,0x71,0xcb,0x07]
+          vpshldd $7, %ymm3, %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldq $7, %ymm3, %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x71,0xcb,0x07]
+          vpshldq $7, %ymm3, %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdw $7, %ymm3, %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x72,0xcb,0x07]
+          vpshrdw $7, %ymm3, %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdd $7, %ymm3, %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x2a,0x73,0xcb,0x07]
+          vpshrdd $7, %ymm3, %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdq $7, %ymm3, %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x73,0xcb,0x07]
+          vpshrdq $7, %ymm3, %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldw $7, %ymm23, %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x70,0xef,0x07]
+          vpshldw $7, %ymm23, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldd $7, %ymm23, %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x22,0x71,0xef,0x07]
+          vpshldd $7, %ymm23, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldq $7, %ymm23, %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x71,0xef,0x07]
+          vpshldq $7, %ymm23, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdw $7, %ymm23, %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x72,0xef,0x07]
+          vpshrdw $7, %ymm23, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdd $7, %ymm23, %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x22,0x73,0xef,0x07]
+          vpshrdd $7, %ymm23, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdq $7, %ymm23, %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x73,0xef,0x07]
+          vpshrdq $7, %ymm23, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldw  $7, (%rcx), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x70,0x09,0x07]
+          vpshldw  $7, (%rcx), %ymm3, %ymm1
+
+// CHECK: vpshldw  $7, -128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x70,0x4c,0x24,0xfc,0x07]
+          vpshldw  $7, -128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshldw  $7, 128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x70,0x4c,0x24,0x04,0x07]
+          vpshldw  $7, 128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshldw  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x28,0x70,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldw  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldw  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x28,0x70,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldw  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x28,0x70,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldd  $7, (%rcx), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x28,0x71,0x09,0x07]
+          vpshldd  $7, (%rcx), %ymm3, %ymm1
+
+// CHECK: vpshldd  $7, -128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x28,0x71,0x4c,0x24,0xfc,0x07]
+          vpshldd  $7, -128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshldd  $7, 128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x28,0x71,0x4c,0x24,0x04,0x07]
+          vpshldd  $7, 128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshldd  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x28,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldd  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldd  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x28,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldd  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x28,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldq  $7, (%rcx), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x71,0x09,0x07]
+          vpshldq  $7, (%rcx), %ymm3, %ymm1
+
+// CHECK: vpshldq  $7, -128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x71,0x4c,0x24,0xfc,0x07]
+          vpshldq  $7, -128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshldq  $7, 128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x71,0x4c,0x24,0x04,0x07]
+          vpshldq  $7, 128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshldq  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x28,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldq  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldq  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x28,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldq  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x28,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdw  $7, (%rcx), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x72,0x09,0x07]
+          vpshrdw  $7, (%rcx), %ymm3, %ymm1
+
+// CHECK: vpshrdw  $7, -128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x72,0x4c,0x24,0xfc,0x07]
+          vpshrdw  $7, -128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshrdw  $7, 128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x72,0x4c,0x24,0x04,0x07]
+          vpshrdw  $7, 128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshrdw  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x28,0x72,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdw  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdw  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x28,0x72,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdw  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x28,0x72,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdd  $7, (%rcx), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x28,0x73,0x09,0x07]
+          vpshrdd  $7, (%rcx), %ymm3, %ymm1
+
+// CHECK: vpshrdd  $7, -128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x28,0x73,0x4c,0x24,0xfc,0x07]
+          vpshrdd  $7, -128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshrdd  $7, 128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0x65,0x28,0x73,0x4c,0x24,0x04,0x07]
+          vpshrdd  $7, 128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshrdd  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x28,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdd  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdd  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x28,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdd  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0x65,0x28,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdq  $7, (%rcx), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x73,0x09,0x07]
+          vpshrdq  $7, (%rcx), %ymm3, %ymm1
+
+// CHECK: vpshrdq  $7, -128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x73,0x4c,0x24,0xfc,0x07]
+          vpshrdq  $7, -128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshrdq  $7, 128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf3,0xe5,0x28,0x73,0x4c,0x24,0x04,0x07]
+          vpshrdq  $7, 128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshrdq  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x28,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdq  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdq  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x28,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdq  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb3,0xe5,0x28,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldw  $7, (%rcx), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x20,0x70,0x29,0x07]
+          vpshldw  $7, (%rcx), %ymm23, %ymm21
+
+// CHECK: vpshldw  $7, -128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x20,0x70,0x6c,0x24,0xfc,0x07]
+          vpshldw  $7, -128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshldw  $7, 128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x20,0x70,0x6c,0x24,0x04,0x07]
+          vpshldw  $7, 128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshldw  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x70,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldw  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldw  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x70,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldw  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x70,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldd  $7, (%rcx), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x20,0x71,0x29,0x07]
+          vpshldd  $7, (%rcx), %ymm23, %ymm21
+
+// CHECK: vpshldd  $7, -128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x20,0x71,0x6c,0x24,0xfc,0x07]
+          vpshldd  $7, -128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshldd  $7, 128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x20,0x71,0x6c,0x24,0x04,0x07]
+          vpshldd  $7, 128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshldd  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x20,0x71,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldd  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldd  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x20,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldd  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x20,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldq  $7, (%rcx), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x20,0x71,0x29,0x07]
+          vpshldq  $7, (%rcx), %ymm23, %ymm21
+
+// CHECK: vpshldq  $7, -128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x20,0x71,0x6c,0x24,0xfc,0x07]
+          vpshldq  $7, -128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshldq  $7, 128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x20,0x71,0x6c,0x24,0x04,0x07]
+          vpshldq  $7, 128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshldq  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x71,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldq  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldq  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldq  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdw  $7, (%rcx), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x20,0x72,0x29,0x07]
+          vpshrdw  $7, (%rcx), %ymm23, %ymm21
+
+// CHECK: vpshrdw  $7, -128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x20,0x72,0x6c,0x24,0xfc,0x07]
+          vpshrdw  $7, -128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshrdw  $7, 128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x20,0x72,0x6c,0x24,0x04,0x07]
+          vpshrdw  $7, 128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshrdw  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x72,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdw  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdw  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x72,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdw  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x72,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdd  $7, (%rcx), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x20,0x73,0x29,0x07]
+          vpshrdd  $7, (%rcx), %ymm23, %ymm21
+
+// CHECK: vpshrdd  $7, -128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x20,0x73,0x6c,0x24,0xfc,0x07]
+          vpshrdd  $7, -128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshrdd  $7, 128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0x45,0x20,0x73,0x6c,0x24,0x04,0x07]
+          vpshrdd  $7, 128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshrdd  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x20,0x73,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdd  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdd  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x20,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdd  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0x45,0x20,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdq  $7, (%rcx), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x20,0x73,0x29,0x07]
+          vpshrdq  $7, (%rcx), %ymm23, %ymm21
+
+// CHECK: vpshrdq  $7, -128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x20,0x73,0x6c,0x24,0xfc,0x07]
+          vpshrdq  $7, -128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshrdq  $7, 128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xc5,0x20,0x73,0x6c,0x24,0x04,0x07]
+          vpshrdq  $7, 128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshrdq  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x73,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdq  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdq  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdq  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xc5,0x20,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldw  $7, (%rcx), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x70,0x09,0x07]
+          vpshldw  $7, (%rcx), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldw  $7, -128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x70,0x4c,0x24,0xfc,0x07]
+          vpshldw  $7, -128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldw  $7, 128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x70,0x4c,0x24,0x04,0x07]
+          vpshldw  $7, 128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldw  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x2a,0x70,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldw  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldw  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x2a,0x70,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldw  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x2a,0x70,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldd  $7, (%rcx), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x2a,0x71,0x09,0x07]
+          vpshldd  $7, (%rcx), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldd  $7, -128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x2a,0x71,0x4c,0x24,0xfc,0x07]
+          vpshldd  $7, -128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldd  $7, 128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x2a,0x71,0x4c,0x24,0x04,0x07]
+          vpshldd  $7, 128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldd  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x2a,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldd  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldd  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x2a,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldd  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x2a,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldq  $7, (%rcx), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x71,0x09,0x07]
+          vpshldq  $7, (%rcx), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldq  $7, -128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x71,0x4c,0x24,0xfc,0x07]
+          vpshldq  $7, -128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldq  $7, 128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x71,0x4c,0x24,0x04,0x07]
+          vpshldq  $7, 128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldq  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x2a,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldq  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldq  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x2a,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldq  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x2a,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdw  $7, (%rcx), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x72,0x09,0x07]
+          vpshrdw  $7, (%rcx), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdw  $7, -128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x72,0x4c,0x24,0xfc,0x07]
+          vpshrdw  $7, -128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdw  $7, 128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x72,0x4c,0x24,0x04,0x07]
+          vpshrdw  $7, 128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdw  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x2a,0x72,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdw  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdw  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x2a,0x72,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdw  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x2a,0x72,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdd  $7, (%rcx), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x2a,0x73,0x09,0x07]
+          vpshrdd  $7, (%rcx), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdd  $7, -128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x2a,0x73,0x4c,0x24,0xfc,0x07]
+          vpshrdd  $7, -128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdd  $7, 128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0x65,0x2a,0x73,0x4c,0x24,0x04,0x07]
+          vpshrdd  $7, 128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdd  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x2a,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdd  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdd  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x2a,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdd  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0x65,0x2a,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdq  $7, (%rcx), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x73,0x09,0x07]
+          vpshrdq  $7, (%rcx), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdq  $7, -128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x73,0x4c,0x24,0xfc,0x07]
+          vpshrdq  $7, -128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdq  $7, 128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf3,0xe5,0x2a,0x73,0x4c,0x24,0x04,0x07]
+          vpshrdq  $7, 128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdq  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x2a,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdq  $7, 268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdq  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x2a,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdq  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb3,0xe5,0x2a,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldw  $7, (%rcx), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x22,0x70,0x29,0x07]
+          vpshldw  $7, (%rcx), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldw  $7, -128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x22,0x70,0x6c,0x24,0xfc,0x07]
+          vpshldw  $7, -128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldw  $7, 128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x22,0x70,0x6c,0x24,0x04,0x07]
+          vpshldw  $7, 128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldw  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x70,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldw  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldw  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x70,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldw  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x70,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldw  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldd  $7, (%rcx), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x22,0x71,0x29,0x07]
+          vpshldd  $7, (%rcx), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldd  $7, -128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x22,0x71,0x6c,0x24,0xfc,0x07]
+          vpshldd  $7, -128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldd  $7, 128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x22,0x71,0x6c,0x24,0x04,0x07]
+          vpshldd  $7, 128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldd  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x22,0x71,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldd  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldd  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x22,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldd  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x22,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldd  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldq  $7, (%rcx), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x22,0x71,0x29,0x07]
+          vpshldq  $7, (%rcx), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldq  $7, -128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x22,0x71,0x6c,0x24,0xfc,0x07]
+          vpshldq  $7, -128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldq  $7, 128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x22,0x71,0x6c,0x24,0x04,0x07]
+          vpshldq  $7, 128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldq  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x71,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshldq  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldq  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldq  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshldq  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdw  $7, (%rcx), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x22,0x72,0x29,0x07]
+          vpshrdw  $7, (%rcx), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdw  $7, -128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x22,0x72,0x6c,0x24,0xfc,0x07]
+          vpshrdw  $7, -128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdw  $7, 128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x22,0x72,0x6c,0x24,0x04,0x07]
+          vpshrdw  $7, 128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdw  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x72,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdw  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdw  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x72,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdw  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x72,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdw  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdd  $7, (%rcx), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x22,0x73,0x29,0x07]
+          vpshrdd  $7, (%rcx), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdd  $7, -128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x22,0x73,0x6c,0x24,0xfc,0x07]
+          vpshrdd  $7, -128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdd  $7, 128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0x45,0x22,0x73,0x6c,0x24,0x04,0x07]
+          vpshrdd  $7, 128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdd  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x22,0x73,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdd  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdd  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x22,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdd  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0x45,0x22,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdd  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdq  $7, (%rcx), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x22,0x73,0x29,0x07]
+          vpshrdq  $7, (%rcx), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdq  $7, -128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x22,0x73,0x6c,0x24,0xfc,0x07]
+          vpshrdq  $7, -128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdq  $7, 128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe3,0xc5,0x22,0x73,0x6c,0x24,0x04,0x07]
+          vpshrdq  $7, 128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdq  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x73,0xac,0xf1,0x00,0x00,0x00,0x10,0x07]
+          vpshrdq  $7, 268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdq  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdq  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa3,0xc5,0x22,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0,0x07]
+          vpshrdq  $7, -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvw %xmm3, %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x70,0xcb]
+          vpshldvw %xmm3, %xmm3, %xmm1
+
+// CHECK: vpshldvd %xmm3, %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x08,0x71,0xcb]
+          vpshldvd %xmm3, %xmm3, %xmm1
+
+// CHECK: vpshldvq %xmm3, %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x71,0xcb]
+          vpshldvq %xmm3, %xmm3, %xmm1
+
+// CHECK: vpshrdvw %xmm3, %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x72,0xcb]
+          vpshrdvw %xmm3, %xmm3, %xmm1
+
+// CHECK: vpshrdvd %xmm3, %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x08,0x73,0xcb]
+          vpshrdvd %xmm3, %xmm3, %xmm1
+
+// CHECK: vpshrdvq %xmm3, %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x73,0xcb]
+          vpshrdvq %xmm3, %xmm3, %xmm1
+
+// CHECK: vpshldvw %xmm23, %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x70,0xef]
+          vpshldvw %xmm23, %xmm23, %xmm21
+
+// CHECK: vpshldvd %xmm23, %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x00,0x71,0xef]
+          vpshldvd %xmm23, %xmm23, %xmm21
+
+// CHECK: vpshldvq %xmm23, %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x71,0xef]
+          vpshldvq %xmm23, %xmm23, %xmm21
+
+// CHECK: vpshrdvw %xmm23, %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x72,0xef]
+          vpshrdvw %xmm23, %xmm23, %xmm21
+
+// CHECK: vpshrdvd %xmm23, %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x00,0x73,0xef]
+          vpshrdvd %xmm23, %xmm23, %xmm21
+
+// CHECK: vpshrdvq %xmm23, %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x73,0xef]
+          vpshrdvq %xmm23, %xmm23, %xmm21
+
+// CHECK: vpshldvw %xmm3, %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x70,0xcb]
+          vpshldvw %xmm3, %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvd %xmm3, %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x0a,0x71,0xcb]
+          vpshldvd %xmm3, %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvq %xmm3, %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x71,0xcb]
+          vpshldvq %xmm3, %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvw %xmm3, %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x72,0xcb]
+          vpshrdvw %xmm3, %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvd %xmm3, %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x0a,0x73,0xcb]
+          vpshrdvd %xmm3, %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvq %xmm3, %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x73,0xcb]
+          vpshrdvq %xmm3, %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvw %xmm23, %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x70,0xef]
+          vpshldvw %xmm23, %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvd %xmm23, %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x02,0x71,0xef]
+          vpshldvd %xmm23, %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvq %xmm23, %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x71,0xef]
+          vpshldvq %xmm23, %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvw %xmm23, %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x72,0xef]
+          vpshrdvw %xmm23, %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvd %xmm23, %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x02,0x73,0xef]
+          vpshrdvd %xmm23, %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvq %xmm23, %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x73,0xef]
+          vpshrdvq %xmm23, %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvw  (%rcx), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x70,0x09]
+          vpshldvw  (%rcx), %xmm3, %xmm1
+
+// CHECK: vpshldvw  -64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x70,0x4c,0x24,0xfc]
+          vpshldvw  -64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshldvw  64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x70,0x4c,0x24,0x04]
+          vpshldvw  64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshldvw  268435456(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x08,0x70,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvw  268435456(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldvw  -536870912(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x08,0x70,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvw  -536870912(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldvw  -536870910(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x08,0x70,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvw  -536870910(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldvd  (%rcx), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x08,0x71,0x09]
+          vpshldvd  (%rcx), %xmm3, %xmm1
+
+// CHECK: vpshldvd  -64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x08,0x71,0x4c,0x24,0xfc]
+          vpshldvd  -64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshldvd  64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x08,0x71,0x4c,0x24,0x04]
+          vpshldvd  64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshldvd  268435456(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x08,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvd  268435456(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldvd  -536870912(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x08,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvd  -536870912(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldvd  -536870910(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x08,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvd  -536870910(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldvq  (%rcx), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x71,0x09]
+          vpshldvq  (%rcx), %xmm3, %xmm1
+
+// CHECK: vpshldvq  -64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x71,0x4c,0x24,0xfc]
+          vpshldvq  -64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshldvq  64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x71,0x4c,0x24,0x04]
+          vpshldvq  64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshldvq  268435456(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x08,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvq  268435456(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldvq  -536870912(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x08,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvq  -536870912(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldvq  -536870910(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x08,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvq  -536870910(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdvw  (%rcx), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x72,0x09]
+          vpshrdvw  (%rcx), %xmm3, %xmm1
+
+// CHECK: vpshrdvw  -64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x72,0x4c,0x24,0xfc]
+          vpshrdvw  -64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshrdvw  64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x72,0x4c,0x24,0x04]
+          vpshrdvw  64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshrdvw  268435456(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x08,0x72,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvw  268435456(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdvw  -536870912(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x08,0x72,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvw  -536870912(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdvw  -536870910(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x08,0x72,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvw  -536870910(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdvd  (%rcx), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x08,0x73,0x09]
+          vpshrdvd  (%rcx), %xmm3, %xmm1
+
+// CHECK: vpshrdvd  -64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x08,0x73,0x4c,0x24,0xfc]
+          vpshrdvd  -64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshrdvd  64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x08,0x73,0x4c,0x24,0x04]
+          vpshrdvd  64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshrdvd  268435456(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x08,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvd  268435456(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdvd  -536870912(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x08,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvd  -536870912(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdvd  -536870910(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x08,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvd  -536870910(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdvq  (%rcx), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x73,0x09]
+          vpshrdvq  (%rcx), %xmm3, %xmm1
+
+// CHECK: vpshrdvq  -64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x73,0x4c,0x24,0xfc]
+          vpshrdvq  -64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshrdvq  64(%rsp), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x08,0x73,0x4c,0x24,0x04]
+          vpshrdvq  64(%rsp), %xmm3, %xmm1
+
+// CHECK: vpshrdvq  268435456(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x08,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvq  268435456(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdvq  -536870912(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x08,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvq  -536870912(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshrdvq  -536870910(%rcx,%r14,8), %xmm3, %xmm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x08,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvq  -536870910(%rcx,%r14,8), %xmm3, %xmm1
+
+// CHECK: vpshldvw  (%rcx), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x00,0x70,0x29]
+          vpshldvw  (%rcx), %xmm23, %xmm21
+
+// CHECK: vpshldvw  -64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x00,0x70,0x6c,0x24,0xfc]
+          vpshldvw  -64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshldvw  64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x00,0x70,0x6c,0x24,0x04]
+          vpshldvw  64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshldvw  268435456(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x70,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvw  268435456(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldvw  -536870912(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x70,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvw  -536870912(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldvw  -536870910(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x70,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvw  -536870910(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldvd  (%rcx), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x71,0x29]
+          vpshldvd  (%rcx), %xmm23, %xmm21
+
+// CHECK: vpshldvd  -64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x71,0x6c,0x24,0xfc]
+          vpshldvd  -64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshldvd  64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x71,0x6c,0x24,0x04]
+          vpshldvd  64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshldvd  268435456(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x00,0x71,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvd  268435456(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldvd  -536870912(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x00,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvd  -536870912(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldvd  -536870910(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x00,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvd  -536870910(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldvq  (%rcx), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x00,0x71,0x29]
+          vpshldvq  (%rcx), %xmm23, %xmm21
+
+// CHECK: vpshldvq  -64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x00,0x71,0x6c,0x24,0xfc]
+          vpshldvq  -64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshldvq  64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x00,0x71,0x6c,0x24,0x04]
+          vpshldvq  64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshldvq  268435456(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x71,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvq  268435456(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldvq  -536870912(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvq  -536870912(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldvq  -536870910(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvq  -536870910(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdvw  (%rcx), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x00,0x72,0x29]
+          vpshrdvw  (%rcx), %xmm23, %xmm21
+
+// CHECK: vpshrdvw  -64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x00,0x72,0x6c,0x24,0xfc]
+          vpshrdvw  -64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshrdvw  64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x00,0x72,0x6c,0x24,0x04]
+          vpshrdvw  64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshrdvw  268435456(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x72,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvw  268435456(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdvw  -536870912(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x72,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvw  -536870912(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdvw  -536870910(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x72,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvw  -536870910(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdvd  (%rcx), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x73,0x29]
+          vpshrdvd  (%rcx), %xmm23, %xmm21
+
+// CHECK: vpshrdvd  -64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x73,0x6c,0x24,0xfc]
+          vpshrdvd  -64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshrdvd  64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x00,0x73,0x6c,0x24,0x04]
+          vpshrdvd  64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshrdvd  268435456(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x00,0x73,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvd  268435456(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdvd  -536870912(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x00,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvd  -536870912(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdvd  -536870910(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x00,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvd  -536870910(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdvq  (%rcx), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x00,0x73,0x29]
+          vpshrdvq  (%rcx), %xmm23, %xmm21
+
+// CHECK: vpshrdvq  -64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x00,0x73,0x6c,0x24,0xfc]
+          vpshrdvq  -64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshrdvq  64(%rsp), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x00,0x73,0x6c,0x24,0x04]
+          vpshrdvq  64(%rsp), %xmm23, %xmm21
+
+// CHECK: vpshrdvq  268435456(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x73,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvq  268435456(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdvq  -536870912(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvq  -536870912(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshrdvq  -536870910(%rcx,%r14,8), %xmm23, %xmm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x00,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvq  -536870910(%rcx,%r14,8), %xmm23, %xmm21
+
+// CHECK: vpshldvw  (%rcx), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x70,0x09]
+          vpshldvw  (%rcx), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvw  -64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x70,0x4c,0x24,0xfc]
+          vpshldvw  -64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvw  64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x70,0x4c,0x24,0x04]
+          vpshldvw  64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvw  268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x0a,0x70,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvw  268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvw  -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x0a,0x70,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvw  -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvw  -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x0a,0x70,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvw  -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvd  (%rcx), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x0a,0x71,0x09]
+          vpshldvd  (%rcx), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvd  -64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x0a,0x71,0x4c,0x24,0xfc]
+          vpshldvd  -64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvd  64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x0a,0x71,0x4c,0x24,0x04]
+          vpshldvd  64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvd  268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x0a,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvd  268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvd  -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x0a,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvd  -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvd  -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x0a,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvd  -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvq  (%rcx), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x71,0x09]
+          vpshldvq  (%rcx), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvq  -64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x71,0x4c,0x24,0xfc]
+          vpshldvq  -64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvq  64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x71,0x4c,0x24,0x04]
+          vpshldvq  64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvq  268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x0a,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvq  268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvq  -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x0a,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvq  -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvq  -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x0a,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvq  -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvw  (%rcx), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x72,0x09]
+          vpshrdvw  (%rcx), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvw  -64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x72,0x4c,0x24,0xfc]
+          vpshrdvw  -64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvw  64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x72,0x4c,0x24,0x04]
+          vpshrdvw  64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvw  268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x0a,0x72,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvw  268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvw  -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x0a,0x72,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvw  -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvw  -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x0a,0x72,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvw  -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvd  (%rcx), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x0a,0x73,0x09]
+          vpshrdvd  (%rcx), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvd  -64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x0a,0x73,0x4c,0x24,0xfc]
+          vpshrdvd  -64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvd  64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x0a,0x73,0x4c,0x24,0x04]
+          vpshrdvd  64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvd  268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x0a,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvd  268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvd  -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x0a,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvd  -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvd  -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x0a,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvd  -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvq  (%rcx), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x73,0x09]
+          vpshrdvq  (%rcx), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvq  -64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x73,0x4c,0x24,0xfc]
+          vpshrdvq  -64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvq  64(%rsp), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x0a,0x73,0x4c,0x24,0x04]
+          vpshrdvq  64(%rsp), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvq  268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x0a,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvq  268435456(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvq  -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x0a,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvq  -536870912(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshrdvq  -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x0a,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvq  -536870910(%rcx,%r14,8), %xmm3, %xmm1 {%k2}
+
+// CHECK: vpshldvw  (%rcx), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x02,0x70,0x29]
+          vpshldvw  (%rcx), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvw  -64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x02,0x70,0x6c,0x24,0xfc]
+          vpshldvw  -64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvw  64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x02,0x70,0x6c,0x24,0x04]
+          vpshldvw  64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvw  268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x70,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvw  268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvw  -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x70,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvw  -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvw  -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x70,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvw  -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvd  (%rcx), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x02,0x71,0x29]
+          vpshldvd  (%rcx), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvd  -64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x02,0x71,0x6c,0x24,0xfc]
+          vpshldvd  -64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvd  64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x02,0x71,0x6c,0x24,0x04]
+          vpshldvd  64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvd  268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x02,0x71,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvd  268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvd  -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x02,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvd  -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvd  -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x02,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvd  -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvq  (%rcx), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x02,0x71,0x29]
+          vpshldvq  (%rcx), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvq  -64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x02,0x71,0x6c,0x24,0xfc]
+          vpshldvq  -64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvq  64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x02,0x71,0x6c,0x24,0x04]
+          vpshldvq  64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvq  268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x71,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvq  268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvq  -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvq  -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvq  -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvq  -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvw  (%rcx), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x02,0x72,0x29]
+          vpshrdvw  (%rcx), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvw  -64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x02,0x72,0x6c,0x24,0xfc]
+          vpshrdvw  -64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvw  64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x02,0x72,0x6c,0x24,0x04]
+          vpshrdvw  64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvw  268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x72,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvw  268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvw  -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x72,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvw  -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvw  -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x72,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvw  -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvd  (%rcx), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x02,0x73,0x29]
+          vpshrdvd  (%rcx), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvd  -64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x02,0x73,0x6c,0x24,0xfc]
+          vpshrdvd  -64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvd  64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x02,0x73,0x6c,0x24,0x04]
+          vpshrdvd  64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvd  268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x02,0x73,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvd  268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvd  -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x02,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvd  -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvd  -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x02,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvd  -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvq  (%rcx), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x02,0x73,0x29]
+          vpshrdvq  (%rcx), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvq  -64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x02,0x73,0x6c,0x24,0xfc]
+          vpshrdvq  -64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvq  64(%rsp), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x02,0x73,0x6c,0x24,0x04]
+          vpshrdvq  64(%rsp), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvq  268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x73,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvq  268435456(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvq  -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvq  -536870912(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshrdvq  -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x02,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvq  -536870910(%rcx,%r14,8), %xmm23, %xmm21 {%k2}
+
+// CHECK: vpshldvw %ymm3, %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x70,0xcb]
+          vpshldvw %ymm3, %ymm3, %ymm1
+
+// CHECK: vpshldvd %ymm3, %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x28,0x71,0xcb]
+          vpshldvd %ymm3, %ymm3, %ymm1
+
+// CHECK: vpshldvq %ymm3, %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x71,0xcb]
+          vpshldvq %ymm3, %ymm3, %ymm1
+
+// CHECK: vpshrdvw %ymm3, %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x72,0xcb]
+          vpshrdvw %ymm3, %ymm3, %ymm1
+
+// CHECK: vpshrdvd %ymm3, %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x28,0x73,0xcb]
+          vpshrdvd %ymm3, %ymm3, %ymm1
+
+// CHECK: vpshrdvq %ymm3, %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x73,0xcb]
+          vpshrdvq %ymm3, %ymm3, %ymm1
+
+// CHECK: vpshldvw %ymm23, %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x70,0xef]
+          vpshldvw %ymm23, %ymm23, %ymm21
+
+// CHECK: vpshldvd %ymm23, %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x20,0x71,0xef]
+          vpshldvd %ymm23, %ymm23, %ymm21
+
+// CHECK: vpshldvq %ymm23, %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x71,0xef]
+          vpshldvq %ymm23, %ymm23, %ymm21
+
+// CHECK: vpshrdvw %ymm23, %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x72,0xef]
+          vpshrdvw %ymm23, %ymm23, %ymm21
+
+// CHECK: vpshrdvd %ymm23, %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x20,0x73,0xef]
+          vpshrdvd %ymm23, %ymm23, %ymm21
+
+// CHECK: vpshrdvq %ymm23, %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x73,0xef]
+          vpshrdvq %ymm23, %ymm23, %ymm21
+
+// CHECK: vpshldvw %ymm3, %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x70,0xcb]
+          vpshldvw %ymm3, %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvd %ymm3, %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x2a,0x71,0xcb]
+          vpshldvd %ymm3, %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvq %ymm3, %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x71,0xcb]
+          vpshldvq %ymm3, %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvw %ymm3, %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x72,0xcb]
+          vpshrdvw %ymm3, %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvd %ymm3, %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x2a,0x73,0xcb]
+          vpshrdvd %ymm3, %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvq %ymm3, %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x73,0xcb]
+          vpshrdvq %ymm3, %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvw %ymm23, %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x70,0xef]
+          vpshldvw %ymm23, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvd %ymm23, %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x22,0x71,0xef]
+          vpshldvd %ymm23, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvq %ymm23, %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x71,0xef]
+          vpshldvq %ymm23, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvw %ymm23, %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x72,0xef]
+          vpshrdvw %ymm23, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvd %ymm23, %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x22,0x73,0xef]
+          vpshrdvd %ymm23, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvq %ymm23, %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x73,0xef]
+          vpshrdvq %ymm23, %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvw  (%rcx), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x70,0x09]
+          vpshldvw  (%rcx), %ymm3, %ymm1
+
+// CHECK: vpshldvw  -128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x70,0x4c,0x24,0xfc]
+          vpshldvw  -128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshldvw  128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x70,0x4c,0x24,0x04]
+          vpshldvw  128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshldvw  268435456(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x28,0x70,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvw  268435456(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldvw  -536870912(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x28,0x70,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvw  -536870912(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldvw  -536870910(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x28,0x70,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvw  -536870910(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldvd  (%rcx), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x28,0x71,0x09]
+          vpshldvd  (%rcx), %ymm3, %ymm1
+
+// CHECK: vpshldvd  -128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x28,0x71,0x4c,0x24,0xfc]
+          vpshldvd  -128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshldvd  128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x28,0x71,0x4c,0x24,0x04]
+          vpshldvd  128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshldvd  268435456(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x28,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvd  268435456(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldvd  -536870912(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x28,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvd  -536870912(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldvd  -536870910(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x28,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvd  -536870910(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldvq  (%rcx), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x71,0x09]
+          vpshldvq  (%rcx), %ymm3, %ymm1
+
+// CHECK: vpshldvq  -128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x71,0x4c,0x24,0xfc]
+          vpshldvq  -128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshldvq  128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x71,0x4c,0x24,0x04]
+          vpshldvq  128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshldvq  268435456(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x28,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvq  268435456(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldvq  -536870912(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x28,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvq  -536870912(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldvq  -536870910(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x28,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvq  -536870910(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdvw  (%rcx), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x72,0x09]
+          vpshrdvw  (%rcx), %ymm3, %ymm1
+
+// CHECK: vpshrdvw  -128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x72,0x4c,0x24,0xfc]
+          vpshrdvw  -128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshrdvw  128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x72,0x4c,0x24,0x04]
+          vpshrdvw  128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshrdvw  268435456(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x28,0x72,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvw  268435456(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdvw  -536870912(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x28,0x72,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvw  -536870912(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdvw  -536870910(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x28,0x72,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvw  -536870910(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdvd  (%rcx), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x28,0x73,0x09]
+          vpshrdvd  (%rcx), %ymm3, %ymm1
+
+// CHECK: vpshrdvd  -128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x28,0x73,0x4c,0x24,0xfc]
+          vpshrdvd  -128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshrdvd  128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0x65,0x28,0x73,0x4c,0x24,0x04]
+          vpshrdvd  128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshrdvd  268435456(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x28,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvd  268435456(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdvd  -536870912(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x28,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvd  -536870912(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdvd  -536870910(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0x65,0x28,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvd  -536870910(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdvq  (%rcx), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x73,0x09]
+          vpshrdvq  (%rcx), %ymm3, %ymm1
+
+// CHECK: vpshrdvq  -128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x73,0x4c,0x24,0xfc]
+          vpshrdvq  -128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshrdvq  128(%rsp), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xf2,0xe5,0x28,0x73,0x4c,0x24,0x04]
+          vpshrdvq  128(%rsp), %ymm3, %ymm1
+
+// CHECK: vpshrdvq  268435456(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x28,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvq  268435456(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdvq  -536870912(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x28,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvq  -536870912(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshrdvq  -536870910(%rcx,%r14,8), %ymm3, %ymm1
+// CHECK: encoding: [0x62,0xb2,0xe5,0x28,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvq  -536870910(%rcx,%r14,8), %ymm3, %ymm1
+
+// CHECK: vpshldvw  (%rcx), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x20,0x70,0x29]
+          vpshldvw  (%rcx), %ymm23, %ymm21
+
+// CHECK: vpshldvw  -128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x20,0x70,0x6c,0x24,0xfc]
+          vpshldvw  -128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshldvw  128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x20,0x70,0x6c,0x24,0x04]
+          vpshldvw  128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshldvw  268435456(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x70,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvw  268435456(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldvw  -536870912(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x70,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvw  -536870912(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldvw  -536870910(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x70,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvw  -536870910(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldvd  (%rcx), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x20,0x71,0x29]
+          vpshldvd  (%rcx), %ymm23, %ymm21
+
+// CHECK: vpshldvd  -128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x20,0x71,0x6c,0x24,0xfc]
+          vpshldvd  -128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshldvd  128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x20,0x71,0x6c,0x24,0x04]
+          vpshldvd  128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshldvd  268435456(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x20,0x71,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvd  268435456(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldvd  -536870912(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x20,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvd  -536870912(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldvd  -536870910(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x20,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvd  -536870910(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldvq  (%rcx), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x20,0x71,0x29]
+          vpshldvq  (%rcx), %ymm23, %ymm21
+
+// CHECK: vpshldvq  -128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x20,0x71,0x6c,0x24,0xfc]
+          vpshldvq  -128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshldvq  128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x20,0x71,0x6c,0x24,0x04]
+          vpshldvq  128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshldvq  268435456(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x71,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvq  268435456(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldvq  -536870912(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvq  -536870912(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldvq  -536870910(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvq  -536870910(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdvw  (%rcx), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x20,0x72,0x29]
+          vpshrdvw  (%rcx), %ymm23, %ymm21
+
+// CHECK: vpshrdvw  -128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x20,0x72,0x6c,0x24,0xfc]
+          vpshrdvw  -128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshrdvw  128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x20,0x72,0x6c,0x24,0x04]
+          vpshrdvw  128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshrdvw  268435456(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x72,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvw  268435456(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdvw  -536870912(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x72,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvw  -536870912(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdvw  -536870910(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x72,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvw  -536870910(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdvd  (%rcx), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x20,0x73,0x29]
+          vpshrdvd  (%rcx), %ymm23, %ymm21
+
+// CHECK: vpshrdvd  -128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x20,0x73,0x6c,0x24,0xfc]
+          vpshrdvd  -128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshrdvd  128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0x45,0x20,0x73,0x6c,0x24,0x04]
+          vpshrdvd  128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshrdvd  268435456(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x20,0x73,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvd  268435456(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdvd  -536870912(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x20,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvd  -536870912(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdvd  -536870910(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x45,0x20,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvd  -536870910(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdvq  (%rcx), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x20,0x73,0x29]
+          vpshrdvq  (%rcx), %ymm23, %ymm21
+
+// CHECK: vpshrdvq  -128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x20,0x73,0x6c,0x24,0xfc]
+          vpshrdvq  -128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshrdvq  128(%rsp), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xe2,0xc5,0x20,0x73,0x6c,0x24,0x04]
+          vpshrdvq  128(%rsp), %ymm23, %ymm21
+
+// CHECK: vpshrdvq  268435456(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x73,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvq  268435456(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdvq  -536870912(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvq  -536870912(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshrdvq  -536870910(%rcx,%r14,8), %ymm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0xc5,0x20,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvq  -536870910(%rcx,%r14,8), %ymm23, %ymm21
+
+// CHECK: vpshldvw  (%rcx), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x70,0x09]
+          vpshldvw  (%rcx), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvw  -128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x70,0x4c,0x24,0xfc]
+          vpshldvw  -128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvw  128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x70,0x4c,0x24,0x04]
+          vpshldvw  128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvw  268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x2a,0x70,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvw  268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvw  -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x2a,0x70,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvw  -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvw  -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x2a,0x70,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvw  -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvd  (%rcx), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x2a,0x71,0x09]
+          vpshldvd  (%rcx), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvd  -128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x2a,0x71,0x4c,0x24,0xfc]
+          vpshldvd  -128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvd  128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x2a,0x71,0x4c,0x24,0x04]
+          vpshldvd  128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvd  268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x2a,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvd  268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvd  -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x2a,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvd  -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvd  -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x2a,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvd  -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvq  (%rcx), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x71,0x09]
+          vpshldvq  (%rcx), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvq  -128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x71,0x4c,0x24,0xfc]
+          vpshldvq  -128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvq  128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x71,0x4c,0x24,0x04]
+          vpshldvq  128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvq  268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x2a,0x71,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvq  268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvq  -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x2a,0x71,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvq  -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvq  -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x2a,0x71,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvq  -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvw  (%rcx), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x72,0x09]
+          vpshrdvw  (%rcx), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvw  -128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x72,0x4c,0x24,0xfc]
+          vpshrdvw  -128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvw  128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x72,0x4c,0x24,0x04]
+          vpshrdvw  128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvw  268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x2a,0x72,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvw  268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvw  -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x2a,0x72,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvw  -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvw  -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x2a,0x72,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvw  -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvd  (%rcx), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x2a,0x73,0x09]
+          vpshrdvd  (%rcx), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvd  -128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x2a,0x73,0x4c,0x24,0xfc]
+          vpshrdvd  -128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvd  128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0x65,0x2a,0x73,0x4c,0x24,0x04]
+          vpshrdvd  128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvd  268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x2a,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvd  268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvd  -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x2a,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvd  -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvd  -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0x65,0x2a,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvd  -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvq  (%rcx), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x73,0x09]
+          vpshrdvq  (%rcx), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvq  -128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x73,0x4c,0x24,0xfc]
+          vpshrdvq  -128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvq  128(%rsp), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xf2,0xe5,0x2a,0x73,0x4c,0x24,0x04]
+          vpshrdvq  128(%rsp), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvq  268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x2a,0x73,0x8c,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvq  268435456(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvq  -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x2a,0x73,0x8c,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvq  -536870912(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshrdvq  -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+// CHECK: encoding: [0x62,0xb2,0xe5,0x2a,0x73,0x8c,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvq  -536870910(%rcx,%r14,8), %ymm3, %ymm1 {%k2}
+
+// CHECK: vpshldvw  (%rcx), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x22,0x70,0x29]
+          vpshldvw  (%rcx), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvw  -128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x22,0x70,0x6c,0x24,0xfc]
+          vpshldvw  -128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvw  128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x22,0x70,0x6c,0x24,0x04]
+          vpshldvw  128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvw  268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x70,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvw  268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvw  -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x70,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvw  -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvw  -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x70,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvw  -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvd  (%rcx), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x22,0x71,0x29]
+          vpshldvd  (%rcx), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvd  -128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x22,0x71,0x6c,0x24,0xfc]
+          vpshldvd  -128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvd  128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x22,0x71,0x6c,0x24,0x04]
+          vpshldvd  128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvd  268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x22,0x71,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvd  268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvd  -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x22,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvd  -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvd  -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x22,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvd  -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvq  (%rcx), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x22,0x71,0x29]
+          vpshldvq  (%rcx), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvq  -128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x22,0x71,0x6c,0x24,0xfc]
+          vpshldvq  -128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvq  128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x22,0x71,0x6c,0x24,0x04]
+          vpshldvq  128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvq  268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x71,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshldvq  268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvq  -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x71,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshldvq  -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshldvq  -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x71,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshldvq  -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvw  (%rcx), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x22,0x72,0x29]
+          vpshrdvw  (%rcx), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvw  -128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x22,0x72,0x6c,0x24,0xfc]
+          vpshrdvw  -128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvw  128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x22,0x72,0x6c,0x24,0x04]
+          vpshrdvw  128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvw  268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x72,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvw  268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvw  -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x72,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvw  -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvw  -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x72,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvw  -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvd  (%rcx), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x22,0x73,0x29]
+          vpshrdvd  (%rcx), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvd  -128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x22,0x73,0x6c,0x24,0xfc]
+          vpshrdvd  -128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvd  128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0x45,0x22,0x73,0x6c,0x24,0x04]
+          vpshrdvd  128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvd  268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x22,0x73,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvd  268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvd  -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x22,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvd  -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvd  -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0x45,0x22,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvd  -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvq  (%rcx), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x22,0x73,0x29]
+          vpshrdvq  (%rcx), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvq  -128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x22,0x73,0x6c,0x24,0xfc]
+          vpshrdvq  -128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvq  128(%rsp), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xe2,0xc5,0x22,0x73,0x6c,0x24,0x04]
+          vpshrdvq  128(%rsp), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvq  268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x73,0xac,0xf1,0x00,0x00,0x00,0x10]
+          vpshrdvq  268435456(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvq  -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x73,0xac,0xf1,0x00,0x00,0x00,0xe0]
+          vpshrdvq  -536870912(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+
+// CHECK: vpshrdvq  -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}
+// CHECK: encoding: [0x62,0xa2,0xc5,0x22,0x73,0xac,0xf1,0x02,0x00,0x00,0xe0]
+          vpshrdvq  -536870910(%rcx,%r14,8), %ymm23, %ymm21 {%k2}




More information about the llvm-commits mailing list