[llvm] r238301 - AVX-512: Implemented all forms of sign-extend and zero-extend instructions for KNL and SKX

Elena Demikhovsky elena.demikhovsky at intel.com
Wed May 27 01:15:19 PDT 2015


Author: delena
Date: Wed May 27 03:15:19 2015
New Revision: 238301

URL: http://llvm.org/viewvc/llvm-project?rev=238301&view=rev
Log:
AVX-512: Implemented all forms of sign-extend and zero-extend instructions for KNL and SKX
Implemented DAG lowering for all these forms.
Added tests for DAG lowering and encoding.

By Igor Breger (igor.breger at intel.com)


Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll
    llvm/trunk/test/MC/X86/x86-64-avx512bw.s
    llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s
    llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed May 27 03:15:19 2015
@@ -1261,6 +1261,19 @@ X86TargetLowering::X86TargetLowering(con
     for (MVT VT : MVT::fp_vector_valuetypes())
       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
 
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64,  MVT::v8i8,  Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64,  MVT::v8i8,  Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64,  MVT::v8i16,  Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64,  MVT::v8i16,  Legal);
+    setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64,  MVT::v8i32,  Legal);
+    setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64,  MVT::v8i32,  Legal);
+    
     setOperationAction(ISD::BR_CC,              MVT::i1,    Expand);
     setOperationAction(ISD::SETCC,              MVT::i1,    Custom);
     setOperationAction(ISD::XOR,                MVT::i1,    Legal);
@@ -1479,7 +1492,11 @@ X86TargetLowering::X86TargetLowering(con
     setOperationAction(ISD::SELECT,             MVT::v32i1, Custom);
     setOperationAction(ISD::SELECT,             MVT::v64i1, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i8, Custom);
+    setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i8, Custom);
+    setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i16, Custom);
+    setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i16, Custom);
     setOperationAction(ISD::SIGN_EXTEND,        MVT::v64i8, Custom);
+    setOperationAction(ISD::ZERO_EXTEND,        MVT::v64i8, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v32i1, Custom);
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v64i1, Custom);
     setOperationAction(ISD::VSELECT,            MVT::v32i16, Legal);
@@ -12093,13 +12110,13 @@ static SDValue LowerAVXExtend(SDValue Op
 }
 
 static  SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
-                                        SelectionDAG &DAG) {
+                  const X86Subtarget *Subtarget, SelectionDAG &DAG) {
   MVT VT = Op->getSimpleValueType(0);
   SDValue In = Op->getOperand(0);
   MVT InVT = In.getSimpleValueType();
   SDLoc DL(Op);
   unsigned int NumElts = VT.getVectorNumElements();
-  if (NumElts != 8 && NumElts != 16)
+  if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
     return SDValue();
 
   if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
@@ -12137,7 +12154,7 @@ static SDValue LowerZERO_EXTEND(SDValue
   MVT SVT = In.getSimpleValueType();
 
   if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
-    return LowerZERO_EXTEND_AVX512(Op, DAG);
+    return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG);
 
   if (Subtarget->hasFp256()) {
     SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
@@ -13876,7 +13893,8 @@ SDValue X86TargetLowering::LowerSELECT(S
   return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops);
 }
 
-static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
+                                       const X86Subtarget *Subtarget,
                                        SelectionDAG &DAG) {
   MVT VT = Op->getSimpleValueType(0);
   SDValue In = Op->getOperand(0);
@@ -13902,7 +13920,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(S
 
   unsigned int NumElts = VT.getVectorNumElements();
 
-  if (NumElts != 8 && NumElts != 16)
+  if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
     return SDValue();
 
   if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed May 27 03:15:19 2015
@@ -5261,79 +5261,146 @@ def : Pat<(v8i32  (X86vtruncm VK8WM:$mas
                   (VPMOVQDrrkz  VK8WM:$mask, VR512:$src)>;
 
 
-multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
-                      RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
-                      PatFrag mem_frag, X86MemOperand x86memop,
-                      ValueType OpVT, ValueType InVT> {
-
-  def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
-              (ins SrcRC:$src),
-              !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
-
-  def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
-              (ins KRC:$mask, SrcRC:$src),
-              !strconcat(OpcodeStr, "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
-              []>, EVEX, EVEX_K;
-
-  def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
-              (ins KRC:$mask, SrcRC:$src),
-              !strconcat(OpcodeStr, "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
-              []>, EVEX, EVEX_KZ;
+multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
+                  X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
+                  X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
+
+  defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+                    (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
+                    (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
+                  EVEX;
 
   let mayLoad = 1 in {
-    def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
-              (ins x86memop:$src),
-              !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
-              [(set DstRC:$dst,
-                (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
-              EVEX;
-
-    def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
-              (ins KRC:$mask, x86memop:$src),
-              !strconcat(OpcodeStr,"\t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
-              []>,
-              EVEX, EVEX_K;
-
-    def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
-              (ins KRC:$mask, x86memop:$src),
-              !strconcat(OpcodeStr,"\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
-              []>,
-              EVEX, EVEX_KZ;
+    defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+                    (ins x86memop:$src), OpcodeStr ,"$src", "$src",
+                    (DestInfo.VT (LdFrag addr:$src))>,
+                  EVEX;
   }
 }
 
-defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
-                             loadv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
-                             EVEX_CD8<8, CD8VQ>;
-defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
-                             loadv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
-                             EVEX_CD8<8, CD8VO>;
-defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
-                             loadv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
-                             EVEX_CD8<16, CD8VH>;
-defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
-                             loadv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
-                             EVEX_CD8<16, CD8VQ>;
-defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
-                             loadv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
-                             EVEX_CD8<32, CD8VH>;
-
-defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
-                             loadv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
-                             EVEX_CD8<8, CD8VQ>;
-defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
-                             loadv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
-                             EVEX_CD8<8, CD8VO>;
-defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
-                             loadv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
-                             EVEX_CD8<16, CD8VH>;
-defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
-                             loadv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
-                             EVEX_CD8<16, CD8VQ>;
-defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
-                             loadv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
-                             EVEX_CD8<32, CD8VH>;
+multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr, SDNode OpNode,
+          string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+  let Predicates = [HasVLX, HasBWI] in {
+    defm Z128:  avx512_extend_common<opc, OpcodeStr, v8i16x_info,
+                    v16i8x_info, i64mem, LdFrag, OpNode>,
+                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;
+
+    defm Z256:  avx512_extend_common<opc, OpcodeStr, v16i16x_info,
+                    v16i8x_info, i128mem, LdFrag, OpNode>,
+                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256;
+  }
+  let Predicates = [HasBWI] in {
+    defm Z   :  avx512_extend_common<opc, OpcodeStr, v32i16_info,
+                    v32i8x_info, i256mem, LdFrag, OpNode>,
+                     EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512;
+  }
+}
+
+multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr, SDNode OpNode,
+          string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z128:  avx512_extend_common<opc, OpcodeStr, v4i32x_info,
+                   v16i8x_info, i32mem, LdFrag, OpNode>,
+                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;
+
+    defm Z256:  avx512_extend_common<opc, OpcodeStr, v8i32x_info,
+                   v16i8x_info, i64mem, LdFrag, OpNode>,
+                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256;
+  }
+  let Predicates = [HasAVX512] in {
+    defm Z   :  avx512_extend_common<opc, OpcodeStr, v16i32_info,
+                   v16i8x_info, i128mem, LdFrag, OpNode>,
+                         EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512;
+  }
+}
+
+multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
+          string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z128:  avx512_extend_common<opc, OpcodeStr, v2i64x_info,
+                   v16i8x_info, i16mem, LdFrag, OpNode>,
+                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;
+
+    defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
+                   v16i8x_info, i32mem, LdFrag, OpNode>,
+                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256;
+  }
+  let Predicates = [HasAVX512] in {
+    defm Z   :  avx512_extend_common<opc, OpcodeStr, v8i64_info,
+                   v16i8x_info, i64mem, LdFrag, OpNode>,
+                     EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512;
+  }
+}
+
+multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr, SDNode OpNode,
+         string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z128:  avx512_extend_common<opc, OpcodeStr, v4i32x_info,
+                   v8i16x_info, i64mem, LdFrag, OpNode>,
+                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;
+
+    defm Z256:  avx512_extend_common<opc, OpcodeStr, v8i32x_info,
+                   v8i16x_info, i128mem, LdFrag, OpNode>,
+                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256;
+  }
+  let Predicates = [HasAVX512] in {
+    defm Z   :  avx512_extend_common<opc, OpcodeStr, v16i32_info,
+                   v16i16x_info, i256mem, LdFrag, OpNode>,
+                     EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512;
+  }
+}
+
+multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
+         string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z128:  avx512_extend_common<opc, OpcodeStr, v2i64x_info,
+                   v8i16x_info, i32mem, LdFrag, OpNode>,
+                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;
+
+    defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
+                   v8i16x_info, i64mem, LdFrag, OpNode>,
+                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256;
+  }
+  let Predicates = [HasAVX512] in {
+    defm Z   :  avx512_extend_common<opc, OpcodeStr, v8i64_info,
+                   v8i16x_info, i128mem, LdFrag, OpNode>,
+                     EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512;
+  }
+}
+
+multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
+         string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
+
+  let Predicates = [HasVLX, HasAVX512] in {
+    defm Z128:  avx512_extend_common<opc, OpcodeStr, v2i64x_info,
+                   v4i32x_info, i64mem, LdFrag, OpNode>,
+                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
+
+    defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
+                   v4i32x_info, i128mem, LdFrag, OpNode>,
+                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
+  }
+  let Predicates = [HasAVX512] in {
+    defm Z   :  avx512_extend_common<opc, OpcodeStr, v8i64_info,
+                   v8i32x_info, i256mem, LdFrag, OpNode>,
+                     EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
+  }
+}
+
+defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">;
+defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">;
+defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">;
+defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">;
+defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">;
+defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">;
+
+
+defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">;
+defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">;
+defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">;
+defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">;
+defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">;
+defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
 
 //===----------------------------------------------------------------------===//
 // GATHER - SCATTER Operations

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed May 27 03:15:19 2015
@@ -5850,10 +5850,10 @@ multiclass SS41I_pmovx_rm_all<bits<8> op
                           OpndItins SSEItins, OpndItins AVXItins,
                           OpndItins AVX2Itins> {
   defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, SSEItins>;
-  let Predicates = [HasAVX] in
+  let Predicates = [HasAVX, NoVLX] in
     defm V#NAME   : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
                                      VR128, VR128, AVXItins>, VEX;
-  let Predicates = [HasAVX2] in
+  let Predicates = [HasAVX2, NoVLX] in
     defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
                                      VR256, VR128, AVX2Itins>, VEX, VEX_L;
 }
@@ -5988,7 +5988,7 @@ multiclass SS41I_pmovx_avx2_patterns<str
             (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
 }
 
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
   defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>;
   defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>;
 }
@@ -6087,7 +6087,7 @@ multiclass SS41I_pmovx_patterns<string O
             (!cast<I>(OpcPrefix#DQrm) addr:$src)>;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
   defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
   defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
 }

Modified: llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll Wed May 27 03:15:19 2015
@@ -1,95 +1,843 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
-
-; CHECK-LABEL: trunc_16x32_to_16x8
-; CHECK: vpmovdb
-; CHECK: ret
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX 
+ 
+ 
+; KNL-LABEL: trunc_16x32_to_16x8
+; KNL: vpmovdb
+; KNL: ret
 define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
   %x = trunc <16 x i32> %i to <16 x i8>
   ret <16 x i8> %x
 }
 
-; CHECK-LABEL: trunc_8x64_to_8x16
-; CHECK: vpmovqw
-; CHECK: ret
+; KNL-LABEL: trunc_8x64_to_8x16
+; KNL: vpmovqw
+; KNL: ret
 define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
   %x = trunc <8 x i64> %i to <8 x i16>
   ret <8 x i16> %x
 }
 
-
-; CHECK-LABEL: zext_16x8_to_16x32
-; CHECK: vpmovzxbd {{.*}}%zmm
-; CHECK: ret
+;SKX-LABEL: zext_8x8mem_to_8x16:                  
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m  %xmm0, %k1     
+;SKX-NEXT:  vpmovzxbw (%rdi), %xmm0 {%k1} {z} 
+;SKX-NEXT:  retq                            
+define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = zext <8 x i8> %a to <8 x i16>  
+  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer  
+  ret <8 x i16> %ret
+}
+
+;SKX-LABEL: sext_8x8mem_to_8x16:                  
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m  %xmm0, %k1     
+;SKX-NEXT:  vpmovsxbw (%rdi), %xmm0 {%k1} {z} 
+;SKX-NEXT:  retq                       
+define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = sext <8 x i8> %a to <8 x i16>  
+  %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer  
+  ret <8 x i16> %ret
+}
+
+;SKX-LABEL: zext_16x8mem_to_16x16:                
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovb2m  %xmm0, %k1     
+;SKX-NEXT:  vpmovzxbw (%rdi), %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq            
+define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+  %a   = load <16 x i8>,<16 x i8> *%i,align 1
+  %x   = zext <16 x i8> %a to <16 x i16>  
+  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer  
+  ret <16 x i16> %ret
+}
+
+;SKX-LABEL: sext_16x8mem_to_16x16:                
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovb2m  %xmm0, %k1     
+;SKX-NEXT:  vpmovsxbw (%rdi), %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq  
+define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+  %a   = load <16 x i8>,<16 x i8> *%i,align 1
+  %x   = sext <16 x i8> %a to <16 x i16>  
+  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer  
+  ret <16 x i16> %ret
+}
+
+;SKX-LABEL: zext_16x8_to_16x16:                   
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovzxbw %xmm0, %ymm0    
+;SKX-NEXT:  retq  
+define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {  
+  %x   = zext <16 x i8> %a to <16 x i16>  
+  ret <16 x i16> %x
+}
+
+;SKX-LABEL: zext_16x8_to_16x16_mask:              
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovb2m  %xmm1, %k1     
+;SKX-NEXT:  vpmovzxbw %xmm0, %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq 
+define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {  
+  %x   = zext <16 x i8> %a to <16 x i16> 
+  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer   
+  ret <16 x i16> %ret
+}
+
+;SKX-LABEL: sext_16x8_to_16x16:                   
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxbw %xmm0, %ymm0    
+;SKX-NEXT:  retq
+define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {  
+  %x   = sext <16 x i8> %a to <16 x i16>  
+  ret <16 x i16> %x
+}
+
+;SKX-LABEL: sext_16x8_to_16x16_mask:              
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovb2m  %xmm1, %k1     
+;SKX-NEXT:  vpmovsxbw %xmm0, %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq 
+define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {  
+  %x   = sext <16 x i8> %a to <16 x i16> 
+  %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer   
+  ret <16 x i16> %ret
+}
+
+;SKX-LABEL: zext_32x8mem_to_32x16:                
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovb2m  %ymm0, %k1     
+;SKX-NEXT:  vpmovzxbw (%rdi), %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq                       
+define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
+  %a   = load <32 x i8>,<32 x i8> *%i,align 1
+  %x   = zext <32 x i8> %a to <32 x i16>  
+  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer  
+  ret <32 x i16> %ret
+}
+
+;SKX-LABEL: sext_32x8mem_to_32x16:                
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovb2m  %ymm0, %k1     
+;SKX-NEXT:  vpmovsxbw (%rdi), %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
+  %a   = load <32 x i8>,<32 x i8> *%i,align 1
+  %x   = sext <32 x i8> %a to <32 x i16>  
+  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer  
+  ret <32 x i16> %ret
+}
+
+;SKX-LABEL: zext_32x8_to_32x16:                   
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovzxbw %ymm0, %zmm0    
+;SKX-NEXT:  retq 
+define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {  
+  %x   = zext <32 x i8> %a to <32 x i16>  
+  ret <32 x i16> %x
+}
+
+;SKX-LABEL: zext_32x8_to_32x16_mask:              
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovb2m  %ymm1, %k1    
+;SKX-NEXT:  vpmovzxbw %ymm0, %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {  
+  %x   = zext <32 x i8> %a to <32 x i16>
+  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer  
+  ret <32 x i16> %ret
+}
+
+;SKX-LABEL: sext_32x8_to_32x16:                   
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxbw %ymm0, %zmm0    
+;SKX-NEXT:  retq
+define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {  
+  %x   = sext <32 x i8> %a to <32 x i16>  
+  ret <32 x i16> %x
+}
+
+;SKX-LABEL: sext_32x8_to_32x16_mask:              
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovb2m  %ymm1, %k1     
+;SKX-NEXT:  vpmovsxbw %ymm0, %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {  
+  %x   = sext <32 x i8> %a to <32 x i16>
+  %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer  
+  ret <32 x i16> %ret
+}
+
+;SKX-LABEL: zext_4x8mem_to_4x32:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovd2m    %xmm0, %k1      
+;SKX-NEXT:  vpmovzxbd    (%rdi), %xmm0 {%k1} {z} 
+;SKX-NEXT:  retq                            
+define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+  %a   = load <4 x i8>,<4 x i8> *%i,align 1
+  %x   = zext <4 x i8> %a to <4 x i32>  
+  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer  
+  ret <4 x i32> %ret
+}
+
+;SKX-LABEL: sext_4x8mem_to_4x32:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovd2m    %xmm0, %k1      
+;SKX-NEXT:  vpmovsxbd    (%rdi), %xmm0 {%k1} {z} 
+;SKX-NEXT:  retq       
+define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+  %a   = load <4 x i8>,<4 x i8> *%i,align 1
+  %x   = sext <4 x i8> %a to <4 x i32>  
+  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer  
+  ret <4 x i32> %ret
+}
+
+;SKX-LABEL: zext_8x8mem_to_8x32:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m    %xmm0, %k1      
+;SKX-NEXT:  vpmovzxbd    (%rdi), %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq    
+define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = zext <8 x i8> %a to <8 x i32>  
+  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer  
+  ret <8 x i32> %ret
+}
+
+;SKX-LABEL: sext_8x8mem_to_8x32:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m    %xmm0, %k1      
+;SKX-NEXT:  vpmovsxbd    (%rdi), %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq         
+define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = sext <8 x i8> %a to <8 x i32>  
+  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer  
+  ret <8 x i32> %ret
+}
+
+;KNL-LABEL: zext_16x8mem_to_16x32:   
+;KNL:       vpmovzxbd    (%rdi), %zmm0 {%k1} {z} 
+;KNL-NEXT:  retq 
+define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+  %a   = load <16 x i8>,<16 x i8> *%i,align 1
+  %x   = zext <16 x i8> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+;KNL-LABEL: sext_16x8mem_to_16x32:   
+;KNL:       vpmovsxbd    (%rdi), %zmm0 {%k1} {z} 
+;KNL-NEXT:  retq  
+define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+  %a   = load <16 x i8>,<16 x i8> *%i,align 1
+  %x   = sext <16 x i8> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+;KNL-LABEL: zext_16x8_to_16x32_mask:                    
+;KNL:       vpmovzxbd %xmm0, %zmm0 {%k1} {z} 
+;KNL-NEXT:  retq                
+define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
+  %x   = zext <16 x i8> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+;KNL-LABEL: sext_16x8_to_16x32_mask:                    
+;KNL:       vpmovsxbd %xmm0, %zmm0 {%k1} {z} 
+;KNL-NEXT:  retq
+define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
+  %x   = sext <16 x i8> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+; KNL-LABEL: zext_16x8_to_16x32
+; KNL: vpmovzxbd {{.*}}%zmm
+; KNL: ret
 define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
   %x = zext <16 x i8> %i to <16 x i32>
   ret <16 x i32> %x
 }
 
-; CHECK-LABEL: sext_16x8_to_16x32
-; CHECK: vpmovsxbd {{.*}}%zmm
-; CHECK: ret
+; KNL-LABEL: sext_16x8_to_16x32
+; KNL: vpmovsxbd {{.*}}%zmm
+; KNL: ret
 define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
   %x = sext <16 x i8> %i to <16 x i32>
   ret <16 x i32> %x
 }
 
+;SKX-LABEL: zext_2x8mem_to_2x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovq2m  %xmm0, %k1
+;SKX-NEXT:  vpmovzxbq (%rdi), %xmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
+  %a   = load <2 x i8>,<2 x i8> *%i,align 1
+  %x   = zext <2 x i8> %a to <2 x i64>
+  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+  ret <2 x i64> %ret
+}
+;SKX-LABEL: sext_2x8mem_to_2x64mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovq2m  %xmm0, %k1
+;SKX-NEXT:  vpmovsxbq (%rdi), %xmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
+  %a   = load <2 x i8>,<2 x i8> *%i,align 1
+  %x   = sext <2 x i8> %a to <2 x i64>
+  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+  ret <2 x i64> %ret
+}
+;SKX-LABEL: sext_2x8mem_to_2x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxbq (%rdi), %xmm0   
+;SKX-NEXT:  retq
+define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
+  %a   = load <2 x i8>,<2 x i8> *%i,align 1
+  %x   = sext <2 x i8> %a to <2 x i64>
+  ret <2 x i64> %x
+}
+
+;SKX-LABEL: zext_4x8mem_to_4x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovd2m  %xmm0, %k1
+;SKX-NEXT:  vpmovzxbq (%rdi), %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+  %a   = load <4 x i8>,<4 x i8> *%i,align 1
+  %x   = zext <4 x i8> %a to <4 x i64>
+  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x8mem_to_4x64mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovd2m  %xmm0, %k1
+;SKX-NEXT:  vpmovsxbq (%rdi), %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+  %a   = load <4 x i8>,<4 x i8> *%i,align 1
+  %x   = sext <4 x i8> %a to <4 x i64>
+  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x8mem_to_4x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxbq (%rdi), %ymm0   
+;SKX-NEXT:  retq
+define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
+  %a   = load <4 x i8>,<4 x i8> *%i,align 1
+  %x   = sext <4 x i8> %a to <4 x i64>
+  ret <4 x i64> %x
+}
+
+;KNL-LABEL: zext_8x8mem_to_8x64:
+;KNL:       vpmovzxbq (%rdi), %zmm0 {%k1} {z} 
+;KNL-NEXT:  retq
+define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = zext <8 x i8> %a to <8 x i64>
+  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+;KNL-LABEL: sext_8x8mem_to_8x64mask:
+;KNL:       vpmovsxbq (%rdi), %zmm0 {%k1} {z} 
+;KNL-NEXT:  retq
+define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = sext <8 x i8> %a to <8 x i64>
+  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+;KNL-LABEL: sext_8x8mem_to_8x64:
+;KNL:       vpmovsxbq (%rdi), %zmm0   
+;KNL-NEXT:  retq
+define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
+  %a   = load <8 x i8>,<8 x i8> *%i,align 1
+  %x   = sext <8 x i8> %a to <8 x i64>
+  ret <8 x i64> %x
+}
 
-; CHECK-LABEL: zext_16x16_to_16x32
-; CHECK: vpmovzxwd {{.*}}%zmm
-; CHECK: ret
-define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %i) nounwind readnone {
-  %x = zext <16 x i16> %i to <16 x i32>
+;SKX-LABEL: zext_4x16mem_to_4x32:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovd2m  %xmm0, %k1
+;SKX-NEXT:  vpmovzxwd (%rdi), %xmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+  %a   = load <4 x i16>,<4 x i16> *%i,align 1
+  %x   = zext <4 x i16> %a to <4 x i32>
+  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+  ret <4 x i32> %ret
+}
+
+;SKX-LABEL: sext_4x16mem_to_4x32mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovd2m  %xmm0, %k1
+;SKX-NEXT:  vpmovsxwd (%rdi), %xmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+  %a   = load <4 x i16>,<4 x i16> *%i,align 1
+  %x   = sext <4 x i16> %a to <4 x i32>
+  %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+  ret <4 x i32> %ret
+}
+
+;SKX-LABEL: sext_4x16mem_to_4x32:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxwd (%rdi), %xmm0   
+;SKX-NEXT:  retq
+define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
+  %a   = load <4 x i16>,<4 x i16> *%i,align 1
+  %x   = sext <4 x i16> %a to <4 x i32>
+  ret <4 x i32> %x
+}
+
+
+;SKX-LABEL: zext_8x16mem_to_8x32:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m  %xmm0, %k1
+;SKX-NEXT:  vpmovzxwd (%rdi), %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+  %a   = load <8 x i16>,<8 x i16> *%i,align 1
+  %x   = zext <8 x i16> %a to <8 x i32>
+  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+  ret <8 x i32> %ret
+}
+
+;SKX-LABEL: sext_8x16mem_to_8x32mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m  %xmm0, %k1
+;SKX-NEXT:  vpmovsxwd (%rdi), %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+  %a   = load <8 x i16>,<8 x i16> *%i,align 1
+  %x   = sext <8 x i16> %a to <8 x i32>
+  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+  ret <8 x i32> %ret
+}
+
+;SKX-LABEL: sext_8x16mem_to_8x32:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxwd (%rdi), %ymm0   
+;SKX-NEXT:  retq
+define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
+  %a   = load <8 x i16>,<8 x i16> *%i,align 1
+  %x   = sext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %x
+}
+
+;SKX-LABEL: zext_8x16_to_8x32mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m  %xmm1, %k1
+;SKX-NEXT:  vpmovzxwd %xmm0, %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
+  %x   = zext <8 x i16> %a to <8 x i32>
+  %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+  ret <8 x i32> %ret
+}
+
+;SKX-LABEL: zext_8x16_to_8x32:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovzxwd %xmm0, %ymm0    
+;SKX-NEXT:  retq
+define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
+  %x   = zext <8 x i16> %a to <8 x i32>
+  ret <8 x i32> %x
+}
+
+;SKX-LABEL: zext_16x16mem_to_16x32:
+;KNL-LABEL: zext_16x16mem_to_16x32:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovb2m  %xmm0, %k1
+;SKX-NEXT:  vpmovzxwd (%rdi), %zmm0 {%k1} {z} 
+;KNL:       vpmovzxwd (%rdi), %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
+  %a   = load <16 x i16>,<16 x i16> *%i,align 1
+  %x   = zext <16 x i16> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+;SKX-LABEL: sext_16x16mem_to_16x32mask:
+;KNL-LABEL: sext_16x16mem_to_16x32mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovb2m  %xmm0, %k1
+;SKX-NEXT:  vpmovsxwd (%rdi), %zmm0 {%k1} {z} 
+;KNL:       vpmovsxwd (%rdi), %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
+  %a   = load <16 x i16>,<16 x i16> *%i,align 1
+  %x   = sext <16 x i16> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+;SKX-LABEL: sext_16x16mem_to_16x32:
+;KNL-LABEL: sext_16x16mem_to_16x32:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxwd (%rdi), %zmm0   
+;KNL:       vpmovsxwd (%rdi), %zmm0   
+;SKX-NEXT:  retq
+define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
+  %a   = load <16 x i16>,<16 x i16> *%i,align 1
+  %x   = sext <16 x i16> %a to <16 x i32>
+  ret <16 x i32> %x
+}
+;SKX-LABEL: zext_16x16_to_16x32mask:
+;KNL-LABEL: zext_16x16_to_16x32mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovb2m  %xmm1, %k1
+;SKX-NEXT:  vpmovzxwd %ymm0, %zmm0 {%k1} {z} 
+;KNL:       vpmovzxwd %ymm0, %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
+  %x   = zext <16 x i16> %a to <16 x i32>
+  %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+  ret <16 x i32> %ret
+}
+
+;SKX-LABEL: zext_16x16_to_16x32:
+;KNL-LABEL: zext_16x16_to_16x32:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovzxwd %ymm0, %zmm0    
+;KNL:       vpmovzxwd %ymm0, %zmm0    
+;SKX-NEXT:  retq
+define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
+  %x   = zext <16 x i16> %a to <16 x i32>
   ret <16 x i32> %x
 }
 
-; CHECK-LABEL: zext_8x16_to_8x64
-; CHECK: vpmovzxwq
-; CHECK: ret
-define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %i) nounwind readnone {
-  %x = zext <8 x i16> %i to <8 x i64>
+;SKX-LABEL: zext_2x16mem_to_2x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovq2m  %xmm0, %k1
+;SKX-NEXT:  vpmovzxwq (%rdi), %xmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
+  %a   = load <2 x i16>,<2 x i16> *%i,align 1
+  %x   = zext <2 x i16> %a to <2 x i64>
+  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+  ret <2 x i64> %ret
+}
+
+;SKX-LABEL: sext_2x16mem_to_2x64mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovq2m  %xmm0, %k1
+;SKX-NEXT:  vpmovsxwq (%rdi), %xmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
+  %a   = load <2 x i16>,<2 x i16> *%i,align 1
+  %x   = sext <2 x i16> %a to <2 x i64>
+  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+  ret <2 x i64> %ret
+}
+
+;SKX-LABEL: sext_2x16mem_to_2x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxwq (%rdi), %xmm0   
+;SKX-NEXT:  retq
+define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
+  %a   = load <2 x i16>,<2 x i16> *%i,align 1
+  %x   = sext <2 x i16> %a to <2 x i64>
+  ret <2 x i64> %x
+}
+
+;SKX-LABEL: zext_4x16mem_to_4x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovd2m  %xmm0, %k1
+;SKX-NEXT:  vpmovzxwq (%rdi), %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+  %a   = load <4 x i16>,<4 x i16> *%i,align 1
+  %x   = zext <4 x i16> %a to <4 x i64>
+  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x16mem_to_4x64mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovd2m  %xmm0, %k1
+;SKX-NEXT:  vpmovsxwq (%rdi), %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+  %a   = load <4 x i16>,<4 x i16> *%i,align 1
+  %x   = sext <4 x i16> %a to <4 x i64>
+  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x16mem_to_4x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxwq (%rdi), %ymm0   
+;SKX-NEXT:  retq
+define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
+  %a   = load <4 x i16>,<4 x i16> *%i,align 1
+  %x   = sext <4 x i16> %a to <4 x i64>
+  ret <4 x i64> %x
+}
+
+;SKX-LABEL: zext_8x16mem_to_8x64:
+;KNL-LABEL: zext_8x16mem_to_8x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m  %xmm0, %k1
+;SKX-NEXT:  vpmovzxwq (%rdi), %zmm0 {%k1} {z} 
+;KNL:       vpmovzxwq (%rdi), %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+  %a   = load <8 x i16>,<8 x i16> *%i,align 1
+  %x   = zext <8 x i16> %a to <8 x i64>
+  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+;SKX-LABEL: sext_8x16mem_to_8x64mask:
+;KNL-LABEL: sext_8x16mem_to_8x64mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m  %xmm0, %k1
+;SKX-NEXT:  vpmovsxwq (%rdi), %zmm0 {%k1} {z} 
+;KNL:       vpmovsxwq (%rdi), %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+  %a   = load <8 x i16>,<8 x i16> *%i,align 1
+  %x   = sext <8 x i16> %a to <8 x i64>
+  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+;SKX-LABEL: sext_8x16mem_to_8x64:
+;KNL-LABEL: sext_8x16mem_to_8x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxwq (%rdi), %zmm0   
+;KNL:       vpmovsxwq (%rdi), %zmm0   
+;SKX-NEXT:  retq
+define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
+  %a   = load <8 x i16>,<8 x i16> *%i,align 1
+  %x   = sext <8 x i16> %a to <8 x i64>
+  ret <8 x i64> %x
+}
+
+;SKX-LABEL: zext_8x16_to_8x64mask:
+;KNL-LABEL: zext_8x16_to_8x64mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m  %xmm1, %k1
+;SKX-NEXT:  vpmovzxwq %xmm0, %zmm0 {%k1} {z} 
+;KNL:       vpmovzxwq %xmm0, %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
+  %x   = zext <8 x i16> %a to <8 x i64>
+  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+;SKX-LABEL: zext_8x16_to_8x64:
+;KNL-LABEL: zext_8x16_to_8x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovzxwq %xmm0, %zmm0    
+;KNL:       vpmovzxwq %xmm0, %zmm0    
+;SKX-NEXT:  retq
+; KNL: ret
+define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
+  %ret   = zext <8 x i16> %a to <8 x i64>
+  ret <8 x i64> %ret
+}
+
+;SKX-LABEL: zext_2x32mem_to_2x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovq2m  %xmm0, %k1
+;SKX-NEXT:  vpmovzxdq (%rdi), %xmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
+  %a   = load <2 x i32>,<2 x i32> *%i,align 1
+  %x   = zext <2 x i32> %a to <2 x i64>
+  %ret = select <2 x  i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+  ret <2 x i64> %ret
+}
+
+;SKX-LABEL: sext_2x32mem_to_2x64mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovq2m  %xmm0, %k1
+;SKX-NEXT:  vpmovsxdq (%rdi), %xmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
+  %a   = load <2 x i32>,<2 x i32> *%i,align 1
+  %x   = sext <2 x i32> %a to <2 x i64>
+  %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+  ret <2 x i64> %ret
+}
+
+;SKX-LABEL: sext_2x32mem_to_2x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxdq (%rdi), %xmm0   
+;SKX-NEXT:  retq
+define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
+  %a   = load <2 x i32>,<2 x i32> *%i,align 1
+  %x   = sext <2 x i32> %a to <2 x i64>
+  ret <2 x i64> %x
+}
+
+;SKX-LABEL: zext_4x32mem_to_4x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovd2m  %xmm0, %k1
+;SKX-NEXT:  vpmovzxdq (%rdi), %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
+  %a   = load <4 x i32>,<4 x i32> *%i,align 1
+  %x   = zext <4 x i32> %a to <4 x i64>
+  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x32mem_to_4x64mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovd2m  %xmm0, %k1
+;SKX-NEXT:  vpmovsxdq (%rdi), %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
+  %a   = load <4 x i32>,<4 x i32> *%i,align 1
+  %x   = sext <4 x i32> %a to <4 x i64>
+  %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x32mem_to_4x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxdq (%rdi), %ymm0   
+;SKX-NEXT:  retq
+define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
+  %a   = load <4 x i32>,<4 x i32> *%i,align 1
+  %x   = sext <4 x i32> %a to <4 x i64>
+  ret <4 x i64> %x
+}
+
+;SKX-LABEL: sext_4x32_to_4x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxdq %xmm0, %ymm0    
+;SKX-NEXT:  retq
+define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
+  %x   = sext <4 x i32> %a to <4 x i64>
+  ret <4 x i64> %x
+}
+
+;SKX-LABEL: zext_4x32_to_4x64mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovd2m  %xmm1, %k1
+;SKX-NEXT:  vpmovzxdq %xmm0, %ymm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
+  %x   = zext <4 x i32> %a to <4 x i64>
+  %ret = select <4 x  i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+  ret <4 x i64> %ret
+}
+
+;SKX-LABEL: zext_8x32mem_to_8x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m  %xmm0, %k1
+;SKX-NEXT:  vpmovzxdq (%rdi), %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
+  %a   = load <8 x i32>,<8 x i32> *%i,align 1
+  %x   = zext <8 x i32> %a to <8 x i64>
+  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+;SKX-LABEL: sext_8x32mem_to_8x64mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m  %xmm0, %k1
+;SKX-NEXT:  vpmovsxdq (%rdi), %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
+  %a   = load <8 x i32>,<8 x i32> *%i,align 1
+  %x   = sext <8 x i32> %a to <8 x i64>
+  %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+
+;SKX-LABEL: sext_8x32mem_to_8x64:
+;KNL-LABEL: sext_8x32mem_to_8x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxdq (%rdi), %zmm0   
+;KNL:       vpmovsxdq (%rdi), %zmm0   
+;SKX-NEXT:  retq
+define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
+  %a   = load <8 x i32>,<8 x i32> *%i,align 1
+  %x   = sext <8 x i32> %a to <8 x i64>
+  ret <8 x i64> %x
+}
+
+;SKX-LABEL: sext_8x32_to_8x64:
+;KNL-LABEL: sext_8x32_to_8x64:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovsxdq %ymm0, %zmm0    
+;KNL:       vpmovsxdq %ymm0, %zmm0    
+;SKX-NEXT:  retq
+define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
+  %x   = sext <8 x i32> %a to <8 x i64>
   ret <8 x i64> %x
 }
 
-;CHECK-LABEL: fptrunc_test
-;CHECK: vcvtpd2ps {{.*}}%zmm
-;CHECK: ret
+;SKX-LABEL: zext_8x32_to_8x64mask:
+;KNL-LABEL: zext_8x32_to_8x64mask:
+;SKX:       ## BB#0:
+;SKX-NEXT:  vpmovw2m  %xmm1, %k1
+;SKX-NEXT:  vpmovzxdq %ymm0, %zmm0 {%k1} {z} 
+;KNL:       vpmovzxdq %ymm0, %zmm0 {%k1} {z} 
+;SKX-NEXT:  retq
+define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
+  %x   = zext <8 x i32> %a to <8 x i64>
+  %ret = select <8 x  i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+  ret <8 x i64> %ret
+}
+;KNL-LABEL: fptrunc_test
+;KNL: vcvtpd2ps {{.*}}%zmm
+;KNL: ret
 define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
   %b = fptrunc <8 x double> %a to <8 x float>
   ret <8 x float> %b
 }
 
-;CHECK-LABEL: fpext_test
-;CHECK: vcvtps2pd {{.*}}%zmm
-;CHECK: ret
+;KNL-LABEL: fpext_test
+;KNL: vcvtps2pd {{.*}}%zmm
+;KNL: ret
 define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
   %b = fpext <8 x float> %a to <8 x double>
   ret <8 x double> %b
 }
 
-; CHECK-LABEL: zext_16i1_to_16xi32
-; CHECK: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
-; CHECK: ret
+; KNL-LABEL: zext_16i1_to_16xi32
+; KNL: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL: ret
 define   <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
   %a = bitcast i16 %b to <16 x i1>
   %c = zext <16 x i1> %a to <16 x i32>
   ret <16 x i32> %c
 }
 
-; CHECK-LABEL: zext_8i1_to_8xi64
-; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
-; CHECK: ret
+; KNL-LABEL: zext_8i1_to_8xi64
+; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL: ret
 define   <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
   %a = bitcast i8 %b to <8 x i1>
   %c = zext <8 x i1> %a to <8 x i64>
   ret <8 x i64> %c
 }
 
-; CHECK-LABEL: trunc_16i8_to_16i1
-; CHECK: vpmovsxbd
-; CHECK: vpandd
-; CHECK: vptestmd
-; CHECK: ret
+; KNL-LABEL: trunc_16i8_to_16i1
+; KNL: vpmovsxbd
+; KNL: vpandd
+; KNL: vptestmd
+; KNL: ret
 ; SKX-LABEL: trunc_16i8_to_16i1
 ; SKX: vpmovb2m %xmm
 define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
@@ -98,10 +846,10 @@ define i16 @trunc_16i8_to_16i1(<16 x i8>
   ret i16 %mask
 }
 
-; CHECK-LABEL: trunc_16i32_to_16i1
-; CHECK: vpandd
-; CHECK: vptestmd
-; CHECK: ret
+; KNL-LABEL: trunc_16i32_to_16i1
+; KNL: vpandd
+; KNL: vptestmd
+; KNL: ret
 ; SKX-LABEL: trunc_16i32_to_16i1
 ; SKX: vpmovd2m %zmm
 define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
@@ -122,11 +870,11 @@ define <4 x i32> @trunc_4i32_to_4i1(<4 x
   ret <4 x i32>%res
 }
 
-; CHECK-LABEL: trunc_8i16_to_8i1
-; CHECK: vpmovsxwq
-; CHECK: vpandq LCP{{.*}}(%rip){1to8}
-; CHECK: vptestmq
-; CHECK: ret
+; KNL-LABEL: trunc_8i16_to_8i1
+; KNL: vpmovsxwq
+; KNL: vpandq LCP{{.*}}(%rip){1to8}
+; KNL: vptestmq
+; KNL: ret
 
 ; SKX-LABEL: trunc_8i16_to_8i1
 ; SKX: vpmovw2m %xmm
@@ -136,10 +884,10 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %
   ret i8 %mask
 }
 
-; CHECK-LABEL: sext_8i1_8i32
-; CHECK: vpbroadcastq  LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-LABEL: sext_8i1_8i32
+; KNL: vpbroadcastq  LCP{{.*}}(%rip), %zmm0 {%k1} {z}
 ; SKX: vpmovm2d
-; CHECK: ret
+; KNL: ret
 define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   %x = icmp slt <8 x i32> %a1, %a2
   %x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
@@ -147,18 +895,18 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32
   ret <8 x i32> %y
 }
 
-; CHECK-LABEL: trunc_v16i32_to_v16i16
-; CHECK: vpmovdw
-; CHECK: ret
+; KNL-LABEL: trunc_v16i32_to_v16i16
+; KNL: vpmovdw
+; KNL: ret
 define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) {
   %1 = trunc <16 x i32> %x to <16 x i16>
   ret <16 x i16> %1
 }
 
-; CHECK-LABEL: trunc_i32_to_i1
-; CHECK: movw    $-4, %ax
-; CHECK: kmovw   %eax, %k1
-; CKECK: korw
+; KNL-LABEL: trunc_i32_to_i1
+; KNL: movw    $-4, %ax
+; KNL: kmovw   %eax, %k1
+; KNL: korw
 define i16 @trunc_i32_to_i1(i32 %a) {
   %a_i = trunc i32 %a to i1
   %maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
@@ -166,35 +914,35 @@ define i16 @trunc_i32_to_i1(i32 %a) {
   ret i16 %res
 }
 
-; CHECK-LABEL: sext_8i1_8i16
+; KNL-LABEL: sext_8i1_8i16
 ; SKX: vpmovm2w
-; CHECK: ret
+; KNL: ret
 define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   %x = icmp slt <8 x i32> %a1, %a2
   %y = sext <8 x i1> %x to <8 x i16>
   ret <8 x i16> %y
 }
 
-; CHECK-LABEL: sext_16i1_16i32
+; KNL-LABEL: sext_16i1_16i32
 ; SKX: vpmovm2d
-; CHECK: ret
+; KNL: ret
 define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
   %x = icmp slt <16 x i32> %a1, %a2
   %y = sext <16 x i1> %x to <16 x i32>
   ret <16 x i32> %y
 }
 
-; CHECK-LABEL: sext_8i1_8i64
+; KNL-LABEL: sext_8i1_8i64
 ; SKX: vpmovm2q
-; CHECK: ret
+; KNL: ret
 define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
   %x = icmp slt <8 x i32> %a1, %a2
   %y = sext <8 x i1> %x to <8 x i64>
   ret <8 x i64> %y
 }
 
-; CHECK-LABEL: @extload_v8i64
-; CHECK: vpmovsxbq
+; KNL-LABEL: @extload_v8i64
+; KNL: vpmovsxbq
 define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
   %sign_load = load <8 x i8>, <8 x i8>* %a
   %c = sext <8 x i8> %sign_load to <8 x i64>

Modified: llvm/trunk/test/MC/X86/x86-64-avx512bw.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512bw.s?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512bw.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512bw.s Wed May 27 03:15:19 2015
@@ -511,6 +511,78 @@
 // CHECK:  encoding: [0x62,0xe2,0x15,0x40,0x3a,0x9a,0xc0,0xdf,0xff,0xff]
           vpminuw -8256(%rdx), %zmm29, %zmm19
 
+// CHECK: vpmovsxbw %ymm18, %zmm22
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x20,0xf2]
+          vpmovsxbw %ymm18, %zmm22
+
+// CHECK: vpmovsxbw %ymm18, %zmm22 {%k5}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x4d,0x20,0xf2]
+          vpmovsxbw %ymm18, %zmm22 {%k5}
+
+// CHECK: vpmovsxbw %ymm18, %zmm22 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0xcd,0x20,0xf2]
+          vpmovsxbw %ymm18, %zmm22 {%k5} {z}
+
+// CHECK: vpmovsxbw (%rcx), %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x20,0x31]
+          vpmovsxbw (%rcx), %zmm22
+
+// CHECK: vpmovsxbw 291(%rax,%r14,8), %zmm22
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x48,0x20,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxbw 291(%rax,%r14,8), %zmm22
+
+// CHECK: vpmovsxbw 4064(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x20,0x72,0x7f]
+          vpmovsxbw 4064(%rdx), %zmm22
+
+// CHECK: vpmovsxbw 4096(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x20,0xb2,0x00,0x10,0x00,0x00]
+          vpmovsxbw 4096(%rdx), %zmm22
+
+// CHECK: vpmovsxbw -4096(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x20,0x72,0x80]
+          vpmovsxbw -4096(%rdx), %zmm22
+
+// CHECK: vpmovsxbw -4128(%rdx), %zmm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x48,0x20,0xb2,0xe0,0xef,0xff,0xff]
+          vpmovsxbw -4128(%rdx), %zmm22
+
+// CHECK: vpmovzxbw %ymm26, %zmm24
+// CHECK:  encoding: [0x62,0x02,0x7d,0x48,0x30,0xc2]
+          vpmovzxbw %ymm26, %zmm24
+
+// CHECK: vpmovzxbw %ymm26, %zmm24 {%k4}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x4c,0x30,0xc2]
+          vpmovzxbw %ymm26, %zmm24 {%k4}
+
+// CHECK: vpmovzxbw %ymm26, %zmm24 {%k4} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xcc,0x30,0xc2]
+          vpmovzxbw %ymm26, %zmm24 {%k4} {z}
+
+// CHECK: vpmovzxbw (%rcx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x30,0x01]
+          vpmovzxbw (%rcx), %zmm24
+
+// CHECK: vpmovzxbw 291(%rax,%r14,8), %zmm24
+// CHECK:  encoding: [0x62,0x22,0x7d,0x48,0x30,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxbw 291(%rax,%r14,8), %zmm24
+
+// CHECK: vpmovzxbw 4064(%rdx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x30,0x42,0x7f]
+          vpmovzxbw 4064(%rdx), %zmm24
+
+// CHECK: vpmovzxbw 4096(%rdx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x30,0x82,0x00,0x10,0x00,0x00]
+          vpmovzxbw 4096(%rdx), %zmm24
+
+// CHECK: vpmovzxbw -4096(%rdx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x30,0x42,0x80]
+          vpmovzxbw -4096(%rdx), %zmm24
+
+// CHECK: vpmovzxbw -4128(%rdx), %zmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x48,0x30,0x82,0xe0,0xef,0xff,0xff]
+          vpmovzxbw -4128(%rdx), %zmm24
+
 // CHECK: vpmullw %zmm19, %zmm28, %zmm19
 // CHECK:  encoding: [0x62,0xa1,0x1d,0x40,0xd5,0xdb]
           vpmullw %zmm19, %zmm28, %zmm19

Modified: llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s Wed May 27 03:15:19 2015
@@ -1312,6 +1312,150 @@
 // CHECK:  encoding: [0x62,0xe2,0x25,0x20,0x3a,0xa2,0xe0,0xef,0xff,0xff]
           vpminuw -4128(%rdx), %ymm27, %ymm20
 
+// CHECK: vpmovsxbw %xmm23, %xmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x20,0xdf]
+          vpmovsxbw %xmm23, %xmm27
+
+// CHECK: vpmovsxbw %xmm23, %xmm27 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0f,0x20,0xdf]
+          vpmovsxbw %xmm23, %xmm27 {%k7}
+
+// CHECK: vpmovsxbw %xmm23, %xmm27 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8f,0x20,0xdf]
+          vpmovsxbw %xmm23, %xmm27 {%k7} {z}
+
+// CHECK: vpmovsxbw (%rcx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x20,0x19]
+          vpmovsxbw (%rcx), %xmm27
+
+// CHECK: vpmovsxbw 291(%rax,%r14,8), %xmm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x20,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxbw 291(%rax,%r14,8), %xmm27
+
+// CHECK: vpmovsxbw 1016(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x20,0x5a,0x7f]
+          vpmovsxbw 1016(%rdx), %xmm27
+
+// CHECK: vpmovsxbw 1024(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x20,0x9a,0x00,0x04,0x00,0x00]
+          vpmovsxbw 1024(%rdx), %xmm27
+
+// CHECK: vpmovsxbw -1024(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x20,0x5a,0x80]
+          vpmovsxbw -1024(%rdx), %xmm27
+
+// CHECK: vpmovsxbw -1032(%rdx), %xmm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x20,0x9a,0xf8,0xfb,0xff,0xff]
+          vpmovsxbw -1032(%rdx), %xmm27
+
+// CHECK: vpmovsxbw %xmm23, %ymm21
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x20,0xef]
+          vpmovsxbw %xmm23, %ymm21
+
+// CHECK: vpmovsxbw %xmm23, %ymm21 {%k7}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x2f,0x20,0xef]
+          vpmovsxbw %xmm23, %ymm21 {%k7}
+
+// CHECK: vpmovsxbw %xmm23, %ymm21 {%k7} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0xaf,0x20,0xef]
+          vpmovsxbw %xmm23, %ymm21 {%k7} {z}
+
+// CHECK: vpmovsxbw (%rcx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x20,0x29]
+          vpmovsxbw (%rcx), %ymm21
+
+// CHECK: vpmovsxbw 291(%rax,%r14,8), %ymm21
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x20,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxbw 291(%rax,%r14,8), %ymm21
+
+// CHECK: vpmovsxbw 2032(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x20,0x6a,0x7f]
+          vpmovsxbw 2032(%rdx), %ymm21
+
+// CHECK: vpmovsxbw 2048(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x20,0xaa,0x00,0x08,0x00,0x00]
+          vpmovsxbw 2048(%rdx), %ymm21
+
+// CHECK: vpmovsxbw -2048(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x20,0x6a,0x80]
+          vpmovsxbw -2048(%rdx), %ymm21
+
+// CHECK: vpmovsxbw -2064(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x20,0xaa,0xf0,0xf7,0xff,0xff]
+          vpmovsxbw -2064(%rdx), %ymm21
+
+// CHECK: vpmovzxbw %xmm29, %xmm30
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x30,0xf5]
+          vpmovzxbw %xmm29, %xmm30
+
+// CHECK: vpmovzxbw %xmm29, %xmm30 {%k7}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x0f,0x30,0xf5]
+          vpmovzxbw %xmm29, %xmm30 {%k7}
+
+// CHECK: vpmovzxbw %xmm29, %xmm30 {%k7} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x8f,0x30,0xf5]
+          vpmovzxbw %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vpmovzxbw (%rcx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x30,0x31]
+          vpmovzxbw (%rcx), %xmm30
+
+// CHECK: vpmovzxbw 291(%rax,%r14,8), %xmm30
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxbw 291(%rax,%r14,8), %xmm30
+
+// CHECK: vpmovzxbw 1016(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x30,0x72,0x7f]
+          vpmovzxbw 1016(%rdx), %xmm30
+
+// CHECK: vpmovzxbw 1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x30,0xb2,0x00,0x04,0x00,0x00]
+          vpmovzxbw 1024(%rdx), %xmm30
+
+// CHECK: vpmovzxbw -1024(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x30,0x72,0x80]
+          vpmovzxbw -1024(%rdx), %xmm30
+
+// CHECK: vpmovzxbw -1032(%rdx), %xmm30
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x30,0xb2,0xf8,0xfb,0xff,0xff]
+          vpmovzxbw -1032(%rdx), %xmm30
+
+// CHECK: vpmovzxbw %xmm29, %ymm22
+// CHECK:  encoding: [0x62,0x82,0x7d,0x28,0x30,0xf5]
+          vpmovzxbw %xmm29, %ymm22
+
+// CHECK: vpmovzxbw %xmm29, %ymm22 {%k2}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x2a,0x30,0xf5]
+          vpmovzxbw %xmm29, %ymm22 {%k2}
+
+// CHECK: vpmovzxbw %xmm29, %ymm22 {%k2} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xaa,0x30,0xf5]
+          vpmovzxbw %xmm29, %ymm22 {%k2} {z}
+
+// CHECK: vpmovzxbw (%rcx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x30,0x31]
+          vpmovzxbw (%rcx), %ymm22
+
+// CHECK: vpmovzxbw 291(%rax,%r14,8), %ymm22
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxbw 291(%rax,%r14,8), %ymm22
+
+// CHECK: vpmovzxbw 2032(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x30,0x72,0x7f]
+          vpmovzxbw 2032(%rdx), %ymm22
+
+// CHECK: vpmovzxbw 2048(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x30,0xb2,0x00,0x08,0x00,0x00]
+          vpmovzxbw 2048(%rdx), %ymm22
+
+// CHECK: vpmovzxbw -2048(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x30,0x72,0x80]
+          vpmovzxbw -2048(%rdx), %ymm22
+
+// CHECK: vpmovzxbw -2064(%rdx), %ymm22
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x30,0xb2,0xf0,0xf7,0xff,0xff]
+          vpmovzxbw -2064(%rdx), %ymm22
+
 // CHECK: vpmullw %xmm26, %xmm19, %xmm29
 // CHECK:  encoding: [0x62,0x01,0x65,0x00,0xd5,0xea]
           vpmullw %xmm26, %xmm19, %xmm29

Modified: llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s Wed May 27 03:15:19 2015
@@ -4524,6 +4524,726 @@
 // CHECK:  encoding: [0x62,0x62,0xd5,0x30,0x3b,0xaa,0xf8,0xfb,0xff,0xff]
           vpminuq -1032(%rdx){1to4}, %ymm21, %ymm29
 
+// CHECK: vpmovsxbd %xmm28, %xmm24
+// CHECK:  encoding: [0x62,0x02,0x7d,0x08,0x21,0xc4]
+          vpmovsxbd %xmm28, %xmm24
+
+// CHECK: vpmovsxbd %xmm28, %xmm24 {%k1}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x09,0x21,0xc4]
+          vpmovsxbd %xmm28, %xmm24 {%k1}
+
+// CHECK: vpmovsxbd %xmm28, %xmm24 {%k1} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x89,0x21,0xc4]
+          vpmovsxbd %xmm28, %xmm24 {%k1} {z}
+
+// CHECK: vpmovsxbd (%rcx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x21,0x01]
+          vpmovsxbd (%rcx), %xmm24
+
+// CHECK: vpmovsxbd 291(%rax,%r14,8), %xmm24
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x21,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxbd 291(%rax,%r14,8), %xmm24
+
+// CHECK: vpmovsxbd 508(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x21,0x42,0x7f]
+          vpmovsxbd 508(%rdx), %xmm24
+
+// CHECK: vpmovsxbd 512(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x21,0x82,0x00,0x02,0x00,0x00]
+          vpmovsxbd 512(%rdx), %xmm24
+
+// CHECK: vpmovsxbd -512(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x21,0x42,0x80]
+          vpmovsxbd -512(%rdx), %xmm24
+
+// CHECK: vpmovsxbd -516(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x21,0x82,0xfc,0xfd,0xff,0xff]
+          vpmovsxbd -516(%rdx), %xmm24
+
+// CHECK: vpmovsxbd %xmm20, %ymm24
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x21,0xc4]
+          vpmovsxbd %xmm20, %ymm24
+
+// CHECK: vpmovsxbd %xmm20, %ymm24 {%k3}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2b,0x21,0xc4]
+          vpmovsxbd %xmm20, %ymm24 {%k3}
+
+// CHECK: vpmovsxbd %xmm20, %ymm24 {%k3} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xab,0x21,0xc4]
+          vpmovsxbd %xmm20, %ymm24 {%k3} {z}
+
+// CHECK: vpmovsxbd (%rcx), %ymm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x21,0x01]
+          vpmovsxbd (%rcx), %ymm24
+
+// CHECK: vpmovsxbd 291(%rax,%r14,8), %ymm24
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x21,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxbd 291(%rax,%r14,8), %ymm24
+
+// CHECK: vpmovsxbd 1016(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x21,0x42,0x7f]
+          vpmovsxbd 1016(%rdx), %ymm24
+
+// CHECK: vpmovsxbd 1024(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x21,0x82,0x00,0x04,0x00,0x00]
+          vpmovsxbd 1024(%rdx), %ymm24
+
+// CHECK: vpmovsxbd -1024(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x21,0x42,0x80]
+          vpmovsxbd -1024(%rdx), %ymm24
+
+// CHECK: vpmovsxbd -1032(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x21,0x82,0xf8,0xfb,0xff,0xff]
+          vpmovsxbd -1032(%rdx), %ymm24
+
+// CHECK: vpmovsxbq %xmm22, %xmm17
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x22,0xce]
+          vpmovsxbq %xmm22, %xmm17
+
+// CHECK: vpmovsxbq %xmm22, %xmm17 {%k5}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x0d,0x22,0xce]
+          vpmovsxbq %xmm22, %xmm17 {%k5}
+
+// CHECK: vpmovsxbq %xmm22, %xmm17 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x8d,0x22,0xce]
+          vpmovsxbq %xmm22, %xmm17 {%k5} {z}
+
+// CHECK: vpmovsxbq (%rcx), %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x22,0x09]
+          vpmovsxbq (%rcx), %xmm17
+
+// CHECK: vpmovsxbq 291(%rax,%r14,8), %xmm17
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxbq 291(%rax,%r14,8), %xmm17
+
+// CHECK: vpmovsxbq 254(%rdx), %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x22,0x4a,0x7f]
+          vpmovsxbq 254(%rdx), %xmm17
+
+// CHECK: vpmovsxbq 256(%rdx), %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x22,0x8a,0x00,0x01,0x00,0x00]
+          vpmovsxbq 256(%rdx), %xmm17
+
+// CHECK: vpmovsxbq -256(%rdx), %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x22,0x4a,0x80]
+          vpmovsxbq -256(%rdx), %xmm17
+
+// CHECK: vpmovsxbq -258(%rdx), %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x22,0x8a,0xfe,0xfe,0xff,0xff]
+          vpmovsxbq -258(%rdx), %xmm17
+
+// CHECK: vpmovsxbq %xmm26, %ymm28
+// CHECK:  encoding: [0x62,0x02,0x7d,0x28,0x22,0xe2]
+          vpmovsxbq %xmm26, %ymm28
+
+// CHECK: vpmovsxbq %xmm26, %ymm28 {%k5}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x2d,0x22,0xe2]
+          vpmovsxbq %xmm26, %ymm28 {%k5}
+
+// CHECK: vpmovsxbq %xmm26, %ymm28 {%k5} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xad,0x22,0xe2]
+          vpmovsxbq %xmm26, %ymm28 {%k5} {z}
+
+// CHECK: vpmovsxbq (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x22,0x21]
+          vpmovsxbq (%rcx), %ymm28
+
+// CHECK: vpmovsxbq 291(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x22,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxbq 291(%rax,%r14,8), %ymm28
+
+// CHECK: vpmovsxbq 508(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x22,0x62,0x7f]
+          vpmovsxbq 508(%rdx), %ymm28
+
+// CHECK: vpmovsxbq 512(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x22,0xa2,0x00,0x02,0x00,0x00]
+          vpmovsxbq 512(%rdx), %ymm28
+
+// CHECK: vpmovsxbq -512(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x22,0x62,0x80]
+          vpmovsxbq -512(%rdx), %ymm28
+
+// CHECK: vpmovsxbq -516(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x22,0xa2,0xfc,0xfd,0xff,0xff]
+          vpmovsxbq -516(%rdx), %ymm28
+
+// CHECK: vpmovsxdq %xmm26, %xmm23
+// CHECK:  encoding: [0x62,0x82,0x7d,0x08,0x25,0xfa]
+          vpmovsxdq %xmm26, %xmm23
+
+// CHECK: vpmovsxdq %xmm26, %xmm23 {%k7}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x0f,0x25,0xfa]
+          vpmovsxdq %xmm26, %xmm23 {%k7}
+
+// CHECK: vpmovsxdq %xmm26, %xmm23 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x8f,0x25,0xfa]
+          vpmovsxdq %xmm26, %xmm23 {%k7} {z}
+
+// CHECK: vpmovsxdq (%rcx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x25,0x39]
+          vpmovsxdq (%rcx), %xmm23
+
+// CHECK: vpmovsxdq 291(%rax,%r14,8), %xmm23
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxdq 291(%rax,%r14,8), %xmm23
+
+// CHECK: vpmovsxdq 1016(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x25,0x7a,0x7f]
+          vpmovsxdq 1016(%rdx), %xmm23
+
+// CHECK: vpmovsxdq 1024(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x25,0xba,0x00,0x04,0x00,0x00]
+          vpmovsxdq 1024(%rdx), %xmm23
+
+// CHECK: vpmovsxdq -1024(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x25,0x7a,0x80]
+          vpmovsxdq -1024(%rdx), %xmm23
+
+// CHECK: vpmovsxdq -1032(%rdx), %xmm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x25,0xba,0xf8,0xfb,0xff,0xff]
+          vpmovsxdq -1032(%rdx), %xmm23
+
+// CHECK: vpmovsxdq %xmm28, %ymm18
+// CHECK:  encoding: [0x62,0x82,0x7d,0x28,0x25,0xd4]
+          vpmovsxdq %xmm28, %ymm18
+
+// CHECK: vpmovsxdq %xmm28, %ymm18 {%k7}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x2f,0x25,0xd4]
+          vpmovsxdq %xmm28, %ymm18 {%k7}
+
+// CHECK: vpmovsxdq %xmm28, %ymm18 {%k7} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xaf,0x25,0xd4]
+          vpmovsxdq %xmm28, %ymm18 {%k7} {z}
+
+// CHECK: vpmovsxdq (%rcx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x25,0x11]
+          vpmovsxdq (%rcx), %ymm18
+
+// CHECK: vpmovsxdq 291(%rax,%r14,8), %ymm18
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x25,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxdq 291(%rax,%r14,8), %ymm18
+
+// CHECK: vpmovsxdq 2032(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x25,0x52,0x7f]
+          vpmovsxdq 2032(%rdx), %ymm18
+
+// CHECK: vpmovsxdq 2048(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x25,0x92,0x00,0x08,0x00,0x00]
+          vpmovsxdq 2048(%rdx), %ymm18
+
+// CHECK: vpmovsxdq -2048(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x25,0x52,0x80]
+          vpmovsxdq -2048(%rdx), %ymm18
+
+// CHECK: vpmovsxdq -2064(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x25,0x92,0xf0,0xf7,0xff,0xff]
+          vpmovsxdq -2064(%rdx), %ymm18
+
+// CHECK: vpmovsxwd %xmm18, %xmm17
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x23,0xca]
+          vpmovsxwd %xmm18, %xmm17
+
+// CHECK: vpmovsxwd %xmm18, %xmm17 {%k4}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x0c,0x23,0xca]
+          vpmovsxwd %xmm18, %xmm17 {%k4}
+
+// CHECK: vpmovsxwd %xmm18, %xmm17 {%k4} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x8c,0x23,0xca]
+          vpmovsxwd %xmm18, %xmm17 {%k4} {z}
+
+// CHECK: vpmovsxwd (%rcx), %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x23,0x09]
+          vpmovsxwd (%rcx), %xmm17
+
+// CHECK: vpmovsxwd 291(%rax,%r14,8), %xmm17
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x23,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxwd 291(%rax,%r14,8), %xmm17
+
+// CHECK: vpmovsxwd 1016(%rdx), %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x23,0x4a,0x7f]
+          vpmovsxwd 1016(%rdx), %xmm17
+
+// CHECK: vpmovsxwd 1024(%rdx), %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x23,0x8a,0x00,0x04,0x00,0x00]
+          vpmovsxwd 1024(%rdx), %xmm17
+
+// CHECK: vpmovsxwd -1024(%rdx), %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x23,0x4a,0x80]
+          vpmovsxwd -1024(%rdx), %xmm17
+
+// CHECK: vpmovsxwd -1032(%rdx), %xmm17
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x23,0x8a,0xf8,0xfb,0xff,0xff]
+          vpmovsxwd -1032(%rdx), %xmm17
+
+// CHECK: vpmovsxwd %xmm25, %ymm21
+// CHECK:  encoding: [0x62,0x82,0x7d,0x28,0x23,0xe9]
+          vpmovsxwd %xmm25, %ymm21
+
+// CHECK: vpmovsxwd %xmm25, %ymm21 {%k5}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x2d,0x23,0xe9]
+          vpmovsxwd %xmm25, %ymm21 {%k5}
+
+// CHECK: vpmovsxwd %xmm25, %ymm21 {%k5} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xad,0x23,0xe9]
+          vpmovsxwd %xmm25, %ymm21 {%k5} {z}
+
+// CHECK: vpmovsxwd (%rcx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x23,0x29]
+          vpmovsxwd (%rcx), %ymm21
+
+// CHECK: vpmovsxwd 291(%rax,%r14,8), %ymm21
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x23,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxwd 291(%rax,%r14,8), %ymm21
+
+// CHECK: vpmovsxwd 2032(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x23,0x6a,0x7f]
+          vpmovsxwd 2032(%rdx), %ymm21
+
+// CHECK: vpmovsxwd 2048(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x23,0xaa,0x00,0x08,0x00,0x00]
+          vpmovsxwd 2048(%rdx), %ymm21
+
+// CHECK: vpmovsxwd -2048(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x23,0x6a,0x80]
+          vpmovsxwd -2048(%rdx), %ymm21
+
+// CHECK: vpmovsxwd -2064(%rdx), %ymm21
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x23,0xaa,0xf0,0xf7,0xff,0xff]
+          vpmovsxwd -2064(%rdx), %ymm21
+
+// CHECK: vpmovsxwq %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x24,0xec]
+          vpmovsxwq %xmm20, %xmm29
+
+// CHECK: vpmovsxwq %xmm20, %xmm29 {%k6}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0e,0x24,0xec]
+          vpmovsxwq %xmm20, %xmm29 {%k6}
+
+// CHECK: vpmovsxwq %xmm20, %xmm29 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8e,0x24,0xec]
+          vpmovsxwq %xmm20, %xmm29 {%k6} {z}
+
+// CHECK: vpmovsxwq (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x24,0x29]
+          vpmovsxwq (%rcx), %xmm29
+
+// CHECK: vpmovsxwq 291(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x24,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxwq 291(%rax,%r14,8), %xmm29
+
+// CHECK: vpmovsxwq 508(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x24,0x6a,0x7f]
+          vpmovsxwq 508(%rdx), %xmm29
+
+// CHECK: vpmovsxwq 512(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x24,0xaa,0x00,0x02,0x00,0x00]
+          vpmovsxwq 512(%rdx), %xmm29
+
+// CHECK: vpmovsxwq -512(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x24,0x6a,0x80]
+          vpmovsxwq -512(%rdx), %xmm29
+
+// CHECK: vpmovsxwq -516(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x24,0xaa,0xfc,0xfd,0xff,0xff]
+          vpmovsxwq -516(%rdx), %xmm29
+
+// CHECK: vpmovsxwq %xmm17, %ymm23
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x24,0xf9]
+          vpmovsxwq %xmm17, %ymm23
+
+// CHECK: vpmovsxwq %xmm17, %ymm23 {%k5}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x2d,0x24,0xf9]
+          vpmovsxwq %xmm17, %ymm23 {%k5}
+
+// CHECK: vpmovsxwq %xmm17, %ymm23 {%k5} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0xad,0x24,0xf9]
+          vpmovsxwq %xmm17, %ymm23 {%k5} {z}
+
+// CHECK: vpmovsxwq (%rcx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x24,0x39]
+          vpmovsxwq (%rcx), %ymm23
+
+// CHECK: vpmovsxwq 291(%rax,%r14,8), %ymm23
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x24,0xbc,0xf0,0x23,0x01,0x00,0x00]
+          vpmovsxwq 291(%rax,%r14,8), %ymm23
+
+// CHECK: vpmovsxwq 1016(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x24,0x7a,0x7f]
+          vpmovsxwq 1016(%rdx), %ymm23
+
+// CHECK: vpmovsxwq 1024(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x24,0xba,0x00,0x04,0x00,0x00]
+          vpmovsxwq 1024(%rdx), %ymm23
+
+// CHECK: vpmovsxwq -1024(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x24,0x7a,0x80]
+          vpmovsxwq -1024(%rdx), %ymm23
+
+// CHECK: vpmovsxwq -1032(%rdx), %ymm23
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x24,0xba,0xf8,0xfb,0xff,0xff]
+          vpmovsxwq -1032(%rdx), %ymm23
+
+// CHECK: vpmovzxbd %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x31,0xc1]
+          vpmovzxbd %xmm17, %xmm24
+
+// CHECK: vpmovzxbd %xmm17, %xmm24 {%k6}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0e,0x31,0xc1]
+          vpmovzxbd %xmm17, %xmm24 {%k6}
+
+// CHECK: vpmovzxbd %xmm17, %xmm24 {%k6} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8e,0x31,0xc1]
+          vpmovzxbd %xmm17, %xmm24 {%k6} {z}
+
+// CHECK: vpmovzxbd (%rcx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x31,0x01]
+          vpmovzxbd (%rcx), %xmm24
+
+// CHECK: vpmovzxbd 291(%rax,%r14,8), %xmm24
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x31,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxbd 291(%rax,%r14,8), %xmm24
+
+// CHECK: vpmovzxbd 508(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x31,0x42,0x7f]
+          vpmovzxbd 508(%rdx), %xmm24
+
+// CHECK: vpmovzxbd 512(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x31,0x82,0x00,0x02,0x00,0x00]
+          vpmovzxbd 512(%rdx), %xmm24
+
+// CHECK: vpmovzxbd -512(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x31,0x42,0x80]
+          vpmovzxbd -512(%rdx), %xmm24
+
+// CHECK: vpmovzxbd -516(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x31,0x82,0xfc,0xfd,0xff,0xff]
+          vpmovzxbd -516(%rdx), %xmm24
+
+// CHECK: vpmovzxbd %xmm17, %ymm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x31,0xd9]
+          vpmovzxbd %xmm17, %ymm27
+
+// CHECK: vpmovzxbd %xmm17, %ymm27 {%k1}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x29,0x31,0xd9]
+          vpmovzxbd %xmm17, %ymm27 {%k1}
+
+// CHECK: vpmovzxbd %xmm17, %ymm27 {%k1} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xa9,0x31,0xd9]
+          vpmovzxbd %xmm17, %ymm27 {%k1} {z}
+
+// CHECK: vpmovzxbd (%rcx), %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x31,0x19]
+          vpmovzxbd (%rcx), %ymm27
+
+// CHECK: vpmovzxbd 291(%rax,%r14,8), %ymm27
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x31,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxbd 291(%rax,%r14,8), %ymm27
+
+// CHECK: vpmovzxbd 1016(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x31,0x5a,0x7f]
+          vpmovzxbd 1016(%rdx), %ymm27
+
+// CHECK: vpmovzxbd 1024(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x31,0x9a,0x00,0x04,0x00,0x00]
+          vpmovzxbd 1024(%rdx), %ymm27
+
+// CHECK: vpmovzxbd -1024(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x31,0x5a,0x80]
+          vpmovzxbd -1024(%rdx), %ymm27
+
+// CHECK: vpmovzxbd -1032(%rdx), %ymm27
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x31,0x9a,0xf8,0xfb,0xff,0xff]
+          vpmovzxbd -1032(%rdx), %ymm27
+
+// CHECK: vpmovzxbq %xmm19, %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x32,0xdb]
+          vpmovzxbq %xmm19, %xmm19
+
+// CHECK: vpmovzxbq %xmm19, %xmm19 {%k1}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x09,0x32,0xdb]
+          vpmovzxbq %xmm19, %xmm19 {%k1}
+
+// CHECK: vpmovzxbq %xmm19, %xmm19 {%k1} {z}
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x89,0x32,0xdb]
+          vpmovzxbq %xmm19, %xmm19 {%k1} {z}
+
+// CHECK: vpmovzxbq (%rcx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x32,0x19]
+          vpmovzxbq (%rcx), %xmm19
+
+// CHECK: vpmovzxbq 291(%rax,%r14,8), %xmm19
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x08,0x32,0x9c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxbq 291(%rax,%r14,8), %xmm19
+
+// CHECK: vpmovzxbq 254(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x32,0x5a,0x7f]
+          vpmovzxbq 254(%rdx), %xmm19
+
+// CHECK: vpmovzxbq 256(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x32,0x9a,0x00,0x01,0x00,0x00]
+          vpmovzxbq 256(%rdx), %xmm19
+
+// CHECK: vpmovzxbq -256(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x32,0x5a,0x80]
+          vpmovzxbq -256(%rdx), %xmm19
+
+// CHECK: vpmovzxbq -258(%rdx), %xmm19
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x08,0x32,0x9a,0xfe,0xfe,0xff,0xff]
+          vpmovzxbq -258(%rdx), %xmm19
+
+// CHECK: vpmovzxbq %xmm19, %ymm24
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x32,0xc3]
+          vpmovzxbq %xmm19, %ymm24
+
+// CHECK: vpmovzxbq %xmm19, %ymm24 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2a,0x32,0xc3]
+          vpmovzxbq %xmm19, %ymm24 {%k2}
+
+// CHECK: vpmovzxbq %xmm19, %ymm24 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaa,0x32,0xc3]
+          vpmovzxbq %xmm19, %ymm24 {%k2} {z}
+
+// CHECK: vpmovzxbq (%rcx), %ymm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x32,0x01]
+          vpmovzxbq (%rcx), %ymm24
+
+// CHECK: vpmovzxbq 291(%rax,%r14,8), %ymm24
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x32,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxbq 291(%rax,%r14,8), %ymm24
+
+// CHECK: vpmovzxbq 508(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x32,0x42,0x7f]
+          vpmovzxbq 508(%rdx), %ymm24
+
+// CHECK: vpmovzxbq 512(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x32,0x82,0x00,0x02,0x00,0x00]
+          vpmovzxbq 512(%rdx), %ymm24
+
+// CHECK: vpmovzxbq -512(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x32,0x42,0x80]
+          vpmovzxbq -512(%rdx), %ymm24
+
+// CHECK: vpmovzxbq -516(%rdx), %ymm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x32,0x82,0xfc,0xfd,0xff,0xff]
+          vpmovzxbq -516(%rdx), %ymm24
+
+// CHECK: vpmovzxdq %xmm21, %xmm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x35,0xcd]
+          vpmovzxdq %xmm21, %xmm25
+
+// CHECK: vpmovzxdq %xmm21, %xmm25 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0f,0x35,0xcd]
+          vpmovzxdq %xmm21, %xmm25 {%k7}
+
+// CHECK: vpmovzxdq %xmm21, %xmm25 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8f,0x35,0xcd]
+          vpmovzxdq %xmm21, %xmm25 {%k7} {z}
+
+// CHECK: vpmovzxdq (%rcx), %xmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x35,0x09]
+          vpmovzxdq (%rcx), %xmm25
+
+// CHECK: vpmovzxdq 291(%rax,%r14,8), %xmm25
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x35,0x8c,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxdq 291(%rax,%r14,8), %xmm25
+
+// CHECK: vpmovzxdq 1016(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x35,0x4a,0x7f]
+          vpmovzxdq 1016(%rdx), %xmm25
+
+// CHECK: vpmovzxdq 1024(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x35,0x8a,0x00,0x04,0x00,0x00]
+          vpmovzxdq 1024(%rdx), %xmm25
+
+// CHECK: vpmovzxdq -1024(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x35,0x4a,0x80]
+          vpmovzxdq -1024(%rdx), %xmm25
+
+// CHECK: vpmovzxdq -1032(%rdx), %xmm25
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x35,0x8a,0xf8,0xfb,0xff,0xff]
+          vpmovzxdq -1032(%rdx), %xmm25
+
+// CHECK: vpmovzxdq %xmm22, %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x35,0xe6]
+          vpmovzxdq %xmm22, %ymm28
+
+// CHECK: vpmovzxdq %xmm22, %ymm28 {%k7}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x2f,0x35,0xe6]
+          vpmovzxdq %xmm22, %ymm28 {%k7}
+
+// CHECK: vpmovzxdq %xmm22, %ymm28 {%k7} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0xaf,0x35,0xe6]
+          vpmovzxdq %xmm22, %ymm28 {%k7} {z}
+
+// CHECK: vpmovzxdq (%rcx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x35,0x21]
+          vpmovzxdq (%rcx), %ymm28
+
+// CHECK: vpmovzxdq 291(%rax,%r14,8), %ymm28
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x35,0xa4,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxdq 291(%rax,%r14,8), %ymm28
+
+// CHECK: vpmovzxdq 2032(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x35,0x62,0x7f]
+          vpmovzxdq 2032(%rdx), %ymm28
+
+// CHECK: vpmovzxdq 2048(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x35,0xa2,0x00,0x08,0x00,0x00]
+          vpmovzxdq 2048(%rdx), %ymm28
+
+// CHECK: vpmovzxdq -2048(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x35,0x62,0x80]
+          vpmovzxdq -2048(%rdx), %ymm28
+
+// CHECK: vpmovzxdq -2064(%rdx), %ymm28
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x35,0xa2,0xf0,0xf7,0xff,0xff]
+          vpmovzxdq -2064(%rdx), %ymm28
+
+// CHECK: vpmovzxwd %xmm17, %xmm24
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x33,0xc1]
+          vpmovzxwd %xmm17, %xmm24
+
+// CHECK: vpmovzxwd %xmm17, %xmm24 {%k4}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0c,0x33,0xc1]
+          vpmovzxwd %xmm17, %xmm24 {%k4}
+
+// CHECK: vpmovzxwd %xmm17, %xmm24 {%k4} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8c,0x33,0xc1]
+          vpmovzxwd %xmm17, %xmm24 {%k4} {z}
+
+// CHECK: vpmovzxwd (%rcx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x33,0x01]
+          vpmovzxwd (%rcx), %xmm24
+
+// CHECK: vpmovzxwd 291(%rax,%r14,8), %xmm24
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x33,0x84,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxwd 291(%rax,%r14,8), %xmm24
+
+// CHECK: vpmovzxwd 1016(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x33,0x42,0x7f]
+          vpmovzxwd 1016(%rdx), %xmm24
+
+// CHECK: vpmovzxwd 1024(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x33,0x82,0x00,0x04,0x00,0x00]
+          vpmovzxwd 1024(%rdx), %xmm24
+
+// CHECK: vpmovzxwd -1024(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x33,0x42,0x80]
+          vpmovzxwd -1024(%rdx), %xmm24
+
+// CHECK: vpmovzxwd -1032(%rdx), %xmm24
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x33,0x82,0xf8,0xfb,0xff,0xff]
+          vpmovzxwd -1032(%rdx), %xmm24
+
+// CHECK: vpmovzxwd %xmm29, %ymm26
+// CHECK:  encoding: [0x62,0x02,0x7d,0x28,0x33,0xd5]
+          vpmovzxwd %xmm29, %ymm26
+
+// CHECK: vpmovzxwd %xmm29, %ymm26 {%k5}
+// CHECK:  encoding: [0x62,0x02,0x7d,0x2d,0x33,0xd5]
+          vpmovzxwd %xmm29, %ymm26 {%k5}
+
+// CHECK: vpmovzxwd %xmm29, %ymm26 {%k5} {z}
+// CHECK:  encoding: [0x62,0x02,0x7d,0xad,0x33,0xd5]
+          vpmovzxwd %xmm29, %ymm26 {%k5} {z}
+
+// CHECK: vpmovzxwd (%rcx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x33,0x11]
+          vpmovzxwd (%rcx), %ymm26
+
+// CHECK: vpmovzxwd 291(%rax,%r14,8), %ymm26
+// CHECK:  encoding: [0x62,0x22,0x7d,0x28,0x33,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxwd 291(%rax,%r14,8), %ymm26
+
+// CHECK: vpmovzxwd 2032(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x33,0x52,0x7f]
+          vpmovzxwd 2032(%rdx), %ymm26
+
+// CHECK: vpmovzxwd 2048(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x33,0x92,0x00,0x08,0x00,0x00]
+          vpmovzxwd 2048(%rdx), %ymm26
+
+// CHECK: vpmovzxwd -2048(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x33,0x52,0x80]
+          vpmovzxwd -2048(%rdx), %ymm26
+
+// CHECK: vpmovzxwd -2064(%rdx), %ymm26
+// CHECK:  encoding: [0x62,0x62,0x7d,0x28,0x33,0x92,0xf0,0xf7,0xff,0xff]
+          vpmovzxwd -2064(%rdx), %ymm26
+
+// CHECK: vpmovzxwq %xmm20, %xmm29
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x34,0xec]
+          vpmovzxwq %xmm20, %xmm29
+
+// CHECK: vpmovzxwq %xmm20, %xmm29 {%k2}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x0a,0x34,0xec]
+          vpmovzxwq %xmm20, %xmm29 {%k2}
+
+// CHECK: vpmovzxwq %xmm20, %xmm29 {%k2} {z}
+// CHECK:  encoding: [0x62,0x22,0x7d,0x8a,0x34,0xec]
+          vpmovzxwq %xmm20, %xmm29 {%k2} {z}
+
+// CHECK: vpmovzxwq (%rcx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x34,0x29]
+          vpmovzxwq (%rcx), %xmm29
+
+// CHECK: vpmovzxwq 291(%rax,%r14,8), %xmm29
+// CHECK:  encoding: [0x62,0x22,0x7d,0x08,0x34,0xac,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxwq 291(%rax,%r14,8), %xmm29
+
+// CHECK: vpmovzxwq 508(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x34,0x6a,0x7f]
+          vpmovzxwq 508(%rdx), %xmm29
+
+// CHECK: vpmovzxwq 512(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x34,0xaa,0x00,0x02,0x00,0x00]
+          vpmovzxwq 512(%rdx), %xmm29
+
+// CHECK: vpmovzxwq -512(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x34,0x6a,0x80]
+          vpmovzxwq -512(%rdx), %xmm29
+
+// CHECK: vpmovzxwq -516(%rdx), %xmm29
+// CHECK:  encoding: [0x62,0x62,0x7d,0x08,0x34,0xaa,0xfc,0xfd,0xff,0xff]
+          vpmovzxwq -516(%rdx), %xmm29
+
+// CHECK: vpmovzxwq %xmm25, %ymm18
+// CHECK:  encoding: [0x62,0x82,0x7d,0x28,0x34,0xd1]
+          vpmovzxwq %xmm25, %ymm18
+
+// CHECK: vpmovzxwq %xmm25, %ymm18 {%k1}
+// CHECK:  encoding: [0x62,0x82,0x7d,0x29,0x34,0xd1]
+          vpmovzxwq %xmm25, %ymm18 {%k1}
+
+// CHECK: vpmovzxwq %xmm25, %ymm18 {%k1} {z}
+// CHECK:  encoding: [0x62,0x82,0x7d,0xa9,0x34,0xd1]
+          vpmovzxwq %xmm25, %ymm18 {%k1} {z}
+
+// CHECK: vpmovzxwq (%rcx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x34,0x11]
+          vpmovzxwq (%rcx), %ymm18
+
+// CHECK: vpmovzxwq 291(%rax,%r14,8), %ymm18
+// CHECK:  encoding: [0x62,0xa2,0x7d,0x28,0x34,0x94,0xf0,0x23,0x01,0x00,0x00]
+          vpmovzxwq 291(%rax,%r14,8), %ymm18
+
+// CHECK: vpmovzxwq 1016(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x34,0x52,0x7f]
+          vpmovzxwq 1016(%rdx), %ymm18
+
+// CHECK: vpmovzxwq 1024(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x34,0x92,0x00,0x04,0x00,0x00]
+          vpmovzxwq 1024(%rdx), %ymm18
+
+// CHECK: vpmovzxwq -1024(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x34,0x52,0x80]
+          vpmovzxwq -1024(%rdx), %ymm18
+
+// CHECK: vpmovzxwq -1032(%rdx), %ymm18
+// CHECK:  encoding: [0x62,0xe2,0x7d,0x28,0x34,0x92,0xf8,0xfb,0xff,0xff]
+          vpmovzxwq -1032(%rdx), %ymm18
+
 // CHECK: vpmulld %xmm24, %xmm19, %xmm25
 // CHECK:  encoding: [0x62,0x02,0x65,0x00,0x40,0xc8]
           vpmulld %xmm24, %xmm19, %xmm25





More information about the llvm-commits mailing list