[llvm] r238301 - AVX-512: Implemented all forms of sign-extend and zero-extend instructions for KNL and SKX
Elena Demikhovsky
elena.demikhovsky at intel.com
Wed May 27 01:15:19 PDT 2015
Author: delena
Date: Wed May 27 03:15:19 2015
New Revision: 238301
URL: http://llvm.org/viewvc/llvm-project?rev=238301&view=rev
Log:
AVX-512: Implemented all forms of sign-extend and zero-extend instructions for KNL and SKX
Implemented DAG lowering for all these forms.
Added tests for DAG lowering and encoding.
By Igor Breger (igor.breger at intel.com)
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll
llvm/trunk/test/MC/X86/x86-64-avx512bw.s
llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s
llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed May 27 03:15:19 2015
@@ -1261,6 +1261,19 @@ X86TargetLowering::X86TargetLowering(con
for (MVT VT : MVT::fp_vector_valuetypes())
setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v16i32, MVT::v16i16, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v32i16, MVT::v32i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i8, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i16, Legal);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v8i64, MVT::v8i32, Legal);
+
setOperationAction(ISD::BR_CC, MVT::i1, Expand);
setOperationAction(ISD::SETCC, MVT::i1, Custom);
setOperationAction(ISD::XOR, MVT::i1, Legal);
@@ -1479,7 +1492,11 @@ X86TargetLowering::X86TargetLowering(con
setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
+ setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
+ setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
setOperationAction(ISD::VSELECT, MVT::v32i16, Legal);
@@ -12093,13 +12110,13 @@ static SDValue LowerAVXExtend(SDValue Op
}
static SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
- SelectionDAG &DAG) {
+ const X86Subtarget *Subtarget, SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
MVT InVT = In.getSimpleValueType();
SDLoc DL(Op);
unsigned int NumElts = VT.getVectorNumElements();
- if (NumElts != 8 && NumElts != 16)
+ if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1)
@@ -12137,7 +12154,7 @@ static SDValue LowerZERO_EXTEND(SDValue
MVT SVT = In.getSimpleValueType();
if (VT.is512BitVector() || SVT.getVectorElementType() == MVT::i1)
- return LowerZERO_EXTEND_AVX512(Op, DAG);
+ return LowerZERO_EXTEND_AVX512(Op, Subtarget, DAG);
if (Subtarget->hasFp256()) {
SDValue Res = LowerAVXExtend(Op, DAG, Subtarget);
@@ -13876,7 +13893,8 @@ SDValue X86TargetLowering::LowerSELECT(S
return DAG.getNode(X86ISD::CMOV, DL, VTs, Ops);
}
-static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, const X86Subtarget *Subtarget,
+static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
+ const X86Subtarget *Subtarget,
SelectionDAG &DAG) {
MVT VT = Op->getSimpleValueType(0);
SDValue In = Op->getOperand(0);
@@ -13902,7 +13920,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(S
unsigned int NumElts = VT.getVectorNumElements();
- if (NumElts != 8 && NumElts != 16)
+ if (NumElts != 8 && NumElts != 16 && !Subtarget->hasBWI())
return SDValue();
if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) {
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed May 27 03:15:19 2015
@@ -5261,79 +5261,146 @@ def : Pat<(v8i32 (X86vtruncm VK8WM:$mas
(VPMOVQDrrkz VK8WM:$mask, VR512:$src)>;
-multiclass avx512_extend<bits<8> opc, string OpcodeStr, RegisterClass KRC,
- RegisterClass DstRC, RegisterClass SrcRC, SDNode OpNode,
- PatFrag mem_frag, X86MemOperand x86memop,
- ValueType OpVT, ValueType InVT> {
-
- def rr : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
- (ins SrcRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst, (OpVT (OpNode (InVT SrcRC:$src))))]>, EVEX;
-
- def rrk : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
- (ins KRC:$mask, SrcRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
- []>, EVEX, EVEX_K;
-
- def rrkz : AVX5128I<opc, MRMSrcReg, (outs DstRC:$dst),
- (ins KRC:$mask, SrcRC:$src),
- !strconcat(OpcodeStr, "\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
- []>, EVEX, EVEX_KZ;
+multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
+ X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
+ X86MemOperand x86memop, PatFrag LdFrag, SDNode OpNode>{
+
+ defm rr : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
+ (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
+ (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
+ EVEX;
let mayLoad = 1 in {
- def rm : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins x86memop:$src),
- !strconcat(OpcodeStr,"\t{$src, $dst|$dst, $src}"),
- [(set DstRC:$dst,
- (OpVT (OpNode (InVT (bitconvert (mem_frag addr:$src))))))]>,
- EVEX;
-
- def rmk : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins KRC:$mask, x86memop:$src),
- !strconcat(OpcodeStr,"\t{$src, $dst {${mask}} |$dst {${mask}}, $src}"),
- []>,
- EVEX, EVEX_K;
-
- def rmkz : AVX5128I<opc, MRMSrcMem, (outs DstRC:$dst),
- (ins KRC:$mask, x86memop:$src),
- !strconcat(OpcodeStr,"\t{$src, $dst {${mask}} {z}|$dst {${mask}} {z}, $src}"),
- []>,
- EVEX, EVEX_KZ;
+ defm rm : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
+ (ins x86memop:$src), OpcodeStr ,"$src", "$src",
+ (DestInfo.VT (LdFrag addr:$src))>,
+ EVEX;
}
}
-defm VPMOVZXBDZ: avx512_extend<0x31, "vpmovzxbd", VK16WM, VR512, VR128X, X86vzext,
- loadv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
- EVEX_CD8<8, CD8VQ>;
-defm VPMOVZXBQZ: avx512_extend<0x32, "vpmovzxbq", VK8WM, VR512, VR128X, X86vzext,
- loadv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
- EVEX_CD8<8, CD8VO>;
-defm VPMOVZXWDZ: avx512_extend<0x33, "vpmovzxwd", VK16WM, VR512, VR256X, X86vzext,
- loadv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
- EVEX_CD8<16, CD8VH>;
-defm VPMOVZXWQZ: avx512_extend<0x34, "vpmovzxwq", VK8WM, VR512, VR128X, X86vzext,
- loadv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
- EVEX_CD8<16, CD8VQ>;
-defm VPMOVZXDQZ: avx512_extend<0x35, "vpmovzxdq", VK8WM, VR512, VR256X, X86vzext,
- loadv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
- EVEX_CD8<32, CD8VH>;
-
-defm VPMOVSXBDZ: avx512_extend<0x21, "vpmovsxbd", VK16WM, VR512, VR128X, X86vsext,
- loadv2i64, i128mem, v16i32, v16i8>, EVEX_V512,
- EVEX_CD8<8, CD8VQ>;
-defm VPMOVSXBQZ: avx512_extend<0x22, "vpmovsxbq", VK8WM, VR512, VR128X, X86vsext,
- loadv2i64, i128mem, v8i64, v16i8>, EVEX_V512,
- EVEX_CD8<8, CD8VO>;
-defm VPMOVSXWDZ: avx512_extend<0x23, "vpmovsxwd", VK16WM, VR512, VR256X, X86vsext,
- loadv4i64, i256mem, v16i32, v16i16>, EVEX_V512,
- EVEX_CD8<16, CD8VH>;
-defm VPMOVSXWQZ: avx512_extend<0x24, "vpmovsxwq", VK8WM, VR512, VR128X, X86vsext,
- loadv2i64, i128mem, v8i64, v8i16>, EVEX_V512,
- EVEX_CD8<16, CD8VQ>;
-defm VPMOVSXDQZ: avx512_extend<0x25, "vpmovsxdq", VK8WM, VR512, VR256X, X86vsext,
- loadv4i64, i256mem, v8i64, v8i32>, EVEX_V512,
- EVEX_CD8<32, CD8VH>;
+multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+ let Predicates = [HasVLX, HasBWI] in {
+ defm Z128: avx512_extend_common<opc, OpcodeStr, v8i16x_info,
+ v16i8x_info, i64mem, LdFrag, OpNode>,
+ EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128;
+
+ defm Z256: avx512_extend_common<opc, OpcodeStr, v16i16x_info,
+ v16i8x_info, i128mem, LdFrag, OpNode>,
+ EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256;
+ }
+ let Predicates = [HasBWI] in {
+ defm Z : avx512_extend_common<opc, OpcodeStr, v32i16_info,
+ v32i8x_info, i256mem, LdFrag, OpNode>,
+ EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512;
+ }
+}
+
+multiclass avx512_extend_BD<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
+ v16i8x_info, i32mem, LdFrag, OpNode>,
+ EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128;
+
+ defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
+ v16i8x_info, i64mem, LdFrag, OpNode>,
+ EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256;
+ }
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_extend_common<opc, OpcodeStr, v16i32_info,
+ v16i8x_info, i128mem, LdFrag, OpNode>,
+ EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512;
+ }
+}
+
+multiclass avx512_extend_BQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
+ v16i8x_info, i16mem, LdFrag, OpNode>,
+ EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128;
+
+ defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
+ v16i8x_info, i32mem, LdFrag, OpNode>,
+ EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256;
+ }
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
+ v16i8x_info, i64mem, LdFrag, OpNode>,
+ EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512;
+ }
+}
+
+multiclass avx512_extend_WD<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z128: avx512_extend_common<opc, OpcodeStr, v4i32x_info,
+ v8i16x_info, i64mem, LdFrag, OpNode>,
+ EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128;
+
+ defm Z256: avx512_extend_common<opc, OpcodeStr, v8i32x_info,
+ v8i16x_info, i128mem, LdFrag, OpNode>,
+ EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256;
+ }
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_extend_common<opc, OpcodeStr, v16i32_info,
+ v16i16x_info, i256mem, LdFrag, OpNode>,
+ EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512;
+ }
+}
+
+multiclass avx512_extend_WQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi16")> {
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
+ v8i16x_info, i32mem, LdFrag, OpNode>,
+ EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128;
+
+ defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
+ v8i16x_info, i64mem, LdFrag, OpNode>,
+ EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256;
+ }
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
+ v8i16x_info, i128mem, LdFrag, OpNode>,
+ EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512;
+ }
+}
+
+multiclass avx512_extend_DQ<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi32")> {
+
+ let Predicates = [HasVLX, HasAVX512] in {
+ defm Z128: avx512_extend_common<opc, OpcodeStr, v2i64x_info,
+ v4i32x_info, i64mem, LdFrag, OpNode>,
+ EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128;
+
+ defm Z256: avx512_extend_common<opc, OpcodeStr, v4i64x_info,
+ v4i32x_info, i128mem, LdFrag, OpNode>,
+ EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
+ }
+ let Predicates = [HasAVX512] in {
+ defm Z : avx512_extend_common<opc, OpcodeStr, v8i64_info,
+ v8i32x_info, i256mem, LdFrag, OpNode>,
+ EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512;
+ }
+}
+
+defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, "z">;
+defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, "z">;
+defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, "z">;
+defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, "z">;
+defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, "z">;
+defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, "z">;
+
+
+defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, "s">;
+defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, "s">;
+defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, "s">;
+defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, "s">;
+defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, "s">;
+defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, "s">;
//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed May 27 03:15:19 2015
@@ -5850,10 +5850,10 @@ multiclass SS41I_pmovx_rm_all<bits<8> op
OpndItins SSEItins, OpndItins AVXItins,
OpndItins AVX2Itins> {
defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, SSEItins>;
- let Predicates = [HasAVX] in
+ let Predicates = [HasAVX, NoVLX] in
defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp,
VR128, VR128, AVXItins>, VEX;
- let Predicates = [HasAVX2] in
+ let Predicates = [HasAVX2, NoVLX] in
defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp,
VR256, VR128, AVX2Itins>, VEX, VEX_L;
}
@@ -5988,7 +5988,7 @@ multiclass SS41I_pmovx_avx2_patterns<str
(!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
}
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>;
defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>;
}
@@ -6087,7 +6087,7 @@ multiclass SS41I_pmovx_patterns<string O
(!cast<I>(OpcPrefix#DQrm) addr:$src)>;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
}
Modified: llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-trunc-ext.ll Wed May 27 03:15:19 2015
@@ -1,95 +1,843 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
-
-; CHECK-LABEL: trunc_16x32_to_16x8
-; CHECK: vpmovdb
-; CHECK: ret
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=SKX
+
+
+; KNL-LABEL: trunc_16x32_to_16x8
+; KNL: vpmovdb
+; KNL: ret
define <16 x i8> @trunc_16x32_to_16x8(<16 x i32> %i) nounwind readnone {
%x = trunc <16 x i32> %i to <16 x i8>
ret <16 x i8> %x
}
-; CHECK-LABEL: trunc_8x64_to_8x16
-; CHECK: vpmovqw
-; CHECK: ret
+; KNL-LABEL: trunc_8x64_to_8x16
+; KNL: vpmovqw
+; KNL: ret
define <8 x i16> @trunc_8x64_to_8x16(<8 x i64> %i) nounwind readnone {
%x = trunc <8 x i64> %i to <8 x i16>
ret <8 x i16> %x
}
-
-; CHECK-LABEL: zext_16x8_to_16x32
-; CHECK: vpmovzxbd {{.*}}%zmm
-; CHECK: ret
+;SKX-LABEL: zext_8x8mem_to_8x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovzxbw (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = zext <8 x i8> %a to <8 x i16>
+ %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+;SKX-LABEL: sext_8x8mem_to_8x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = sext <8 x i8> %a to <8 x i16>
+ %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+;SKX-LABEL: zext_16x8mem_to_16x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm0, %k1
+;SKX-NEXT: vpmovzxbw (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+ %a = load <16 x i8>,<16 x i8> *%i,align 1
+ %x = zext <16 x i8> %a to <16 x i16>
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+ ret <16 x i16> %ret
+}
+
+;SKX-LABEL: sext_16x8mem_to_16x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm0, %k1
+;SKX-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+ %a = load <16 x i8>,<16 x i8> *%i,align 1
+ %x = sext <16 x i8> %a to <16 x i16>
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+ ret <16 x i16> %ret
+}
+
+;SKX-LABEL: zext_16x8_to_16x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovzxbw %xmm0, %ymm0
+;SKX-NEXT: retq
+define <16 x i16> @zext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
+ %x = zext <16 x i8> %a to <16 x i16>
+ ret <16 x i16> %x
+}
+
+;SKX-LABEL: zext_16x8_to_16x16_mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm1, %k1
+;SKX-NEXT: vpmovzxbw %xmm0, %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
+ %x = zext <16 x i8> %a to <16 x i16>
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+ ret <16 x i16> %ret
+}
+
+;SKX-LABEL: sext_16x8_to_16x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxbw %xmm0, %ymm0
+;SKX-NEXT: retq
+define <16 x i16> @sext_16x8_to_16x16(<16 x i8> %a ) nounwind readnone {
+ %x = sext <16 x i8> %a to <16 x i16>
+ ret <16 x i16> %x
+}
+
+;SKX-LABEL: sext_16x8_to_16x16_mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm1, %k1
+;SKX-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwind readnone {
+ %x = sext <16 x i8> %a to <16 x i16>
+ %ret = select <16 x i1> %mask, <16 x i16> %x, <16 x i16> zeroinitializer
+ ret <16 x i16> %ret
+}
+
+;SKX-LABEL: zext_32x8mem_to_32x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %ymm0, %k1
+;SKX-NEXT: vpmovzxbw (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
+ %a = load <32 x i8>,<32 x i8> *%i,align 1
+ %x = zext <32 x i8> %a to <32 x i16>
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
+
+;SKX-LABEL: sext_32x8mem_to_32x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %ymm0, %k1
+;SKX-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwind readnone {
+ %a = load <32 x i8>,<32 x i8> *%i,align 1
+ %x = sext <32 x i8> %a to <32 x i16>
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
+
+;SKX-LABEL: zext_32x8_to_32x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovzxbw %ymm0, %zmm0
+;SKX-NEXT: retq
+define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
+ %x = zext <32 x i8> %a to <32 x i16>
+ ret <32 x i16> %x
+}
+
+;SKX-LABEL: zext_32x8_to_32x16_mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %ymm1, %k1
+;SKX-NEXT: vpmovzxbw %ymm0, %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
+ %x = zext <32 x i8> %a to <32 x i16>
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
+
+;SKX-LABEL: sext_32x8_to_32x16:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxbw %ymm0, %zmm0
+;SKX-NEXT: retq
+define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone {
+ %x = sext <32 x i8> %a to <32 x i16>
+ ret <32 x i16> %x
+}
+
+;SKX-LABEL: sext_32x8_to_32x16_mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %ymm1, %k1
+;SKX-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwind readnone {
+ %x = sext <32 x i8> %a to <32 x i16>
+ %ret = select <32 x i1> %mask, <32 x i16> %x, <32 x i16> zeroinitializer
+ ret <32 x i16> %ret
+}
+
+;SKX-LABEL: zext_4x8mem_to_4x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovzxbd (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = zext <4 x i8> %a to <4 x i32>
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+;SKX-LABEL: sext_4x8mem_to_4x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = sext <4 x i8> %a to <4 x i32>
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+;SKX-LABEL: zext_8x8mem_to_8x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovzxbd (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = zext <8 x i8> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+;SKX-LABEL: sext_8x8mem_to_8x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = sext <8 x i8> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+;KNL-LABEL: zext_16x8mem_to_16x32:
+;KNL: vpmovzxbd (%rdi), %zmm0 {%k1} {z}
+;KNL-NEXT: retq
+define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+ %a = load <16 x i8>,<16 x i8> *%i,align 1
+ %x = zext <16 x i8> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+;KNL-LABEL: sext_16x8mem_to_16x32:
+;KNL: vpmovsxbd (%rdi), %zmm0 {%k1} {z}
+;KNL-NEXT: retq
+define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwind readnone {
+ %a = load <16 x i8>,<16 x i8> *%i,align 1
+ %x = sext <16 x i8> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+;KNL-LABEL: zext_16x8_to_16x32_mask:
+;KNL: vpmovzxbd %xmm0, %zmm0 {%k1} {z}
+;KNL-NEXT: retq
+define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
+ %x = zext <16 x i8> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+;KNL-LABEL: sext_16x8_to_16x32_mask:
+;KNL: vpmovsxbd %xmm0, %zmm0 {%k1} {z}
+;KNL-NEXT: retq
+define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounwind readnone {
+ %x = sext <16 x i8> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+; KNL-LABEL: zext_16x8_to_16x32
+; KNL: vpmovzxbd {{.*}}%zmm
+; KNL: ret
define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
%x = zext <16 x i8> %i to <16 x i32>
ret <16 x i32> %x
}
-; CHECK-LABEL: sext_16x8_to_16x32
-; CHECK: vpmovsxbd {{.*}}%zmm
-; CHECK: ret
+; KNL-LABEL: sext_16x8_to_16x32
+; KNL: vpmovsxbd {{.*}}%zmm
+; KNL: ret
define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone {
%x = sext <16 x i8> %i to <16 x i32>
ret <16 x i32> %x
}
+;SKX-LABEL: zext_2x8mem_to_2x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovq2m %xmm0, %k1
+;SKX-NEXT: vpmovzxbq (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
+ %a = load <2 x i8>,<2 x i8> *%i,align 1
+ %x = zext <2 x i8> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+;SKX-LABEL: sext_2x8mem_to_2x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovq2m %xmm0, %k1
+;SKX-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwind readnone {
+ %a = load <2 x i8>,<2 x i8> *%i,align 1
+ %x = sext <2 x i8> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+;SKX-LABEL: sext_2x8mem_to_2x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxbq (%rdi), %xmm0
+;SKX-NEXT: retq
+define <2 x i64> @sext_2x8mem_to_2x64(<2 x i8> *%i) nounwind readnone {
+ %a = load <2 x i8>,<2 x i8> *%i,align 1
+ %x = sext <2 x i8> %a to <2 x i64>
+ ret <2 x i64> %x
+}
+
+;SKX-LABEL: zext_4x8mem_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovzxbq (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = zext <4 x i8> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x8mem_to_4x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = sext <4 x i8> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x8mem_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxbq (%rdi), %ymm0
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x8mem_to_4x64(<4 x i8> *%i) nounwind readnone {
+ %a = load <4 x i8>,<4 x i8> *%i,align 1
+ %x = sext <4 x i8> %a to <4 x i64>
+ ret <4 x i64> %x
+}
+
+;KNL-LABEL: zext_8x8mem_to_8x64:
+;KNL: vpmovzxbq (%rdi), %zmm0 {%k1} {z}
+;KNL-NEXT: retq
+define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = zext <8 x i8> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;KNL-LABEL: sext_8x8mem_to_8x64mask:
+;KNL: vpmovsxbq (%rdi), %zmm0 {%k1} {z}
+;KNL-NEXT: retq
+define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = sext <8 x i8> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;KNL-LABEL: sext_8x8mem_to_8x64:
+;KNL: vpmovsxbq (%rdi), %zmm0
+;KNL-NEXT: retq
+define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone {
+ %a = load <8 x i8>,<8 x i8> *%i,align 1
+ %x = sext <8 x i8> %a to <8 x i64>
+ ret <8 x i64> %x
+}
-; CHECK-LABEL: zext_16x16_to_16x32
-; CHECK: vpmovzxwd {{.*}}%zmm
-; CHECK: ret
-define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %i) nounwind readnone {
- %x = zext <16 x i16> %i to <16 x i32>
+;SKX-LABEL: zext_4x16mem_to_4x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovzxwd (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = zext <4 x i16> %a to <4 x i32>
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+;SKX-LABEL: sext_4x16mem_to_4x32mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = sext <4 x i16> %a to <4 x i32>
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+;SKX-LABEL: sext_4x16mem_to_4x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxwd (%rdi), %xmm0
+;SKX-NEXT: retq
+define <4 x i32> @sext_4x16mem_to_4x32(<4 x i16> *%i) nounwind readnone {
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = sext <4 x i16> %a to <4 x i32>
+ ret <4 x i32> %x
+}
+
+
+;SKX-LABEL: zext_8x16mem_to_8x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovzxwd (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = zext <8 x i16> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+;SKX-LABEL: sext_8x16mem_to_8x32mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = sext <8 x i16> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+;SKX-LABEL: sext_8x16mem_to_8x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxwd (%rdi), %ymm0
+;SKX-NEXT: retq
+define <8 x i32> @sext_8x16mem_to_8x32(<8 x i16> *%i) nounwind readnone {
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = sext <8 x i16> %a to <8 x i32>
+ ret <8 x i32> %x
+}
+
+;SKX-LABEL: zext_8x16_to_8x32mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm1, %k1
+;SKX-NEXT: vpmovzxwd %xmm0, %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
+ %x = zext <8 x i16> %a to <8 x i32>
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> zeroinitializer
+ ret <8 x i32> %ret
+}
+
+;SKX-LABEL: zext_8x16_to_8x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovzxwd %xmm0, %ymm0
+;SKX-NEXT: retq
+define <8 x i32> @zext_8x16_to_8x32(<8 x i16> %a ) nounwind readnone {
+ %x = zext <8 x i16> %a to <8 x i32>
+ ret <8 x i32> %x
+}
+
+;SKX-LABEL: zext_16x16mem_to_16x32:
+;KNL-LABEL: zext_16x16mem_to_16x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm0, %k1
+;SKX-NEXT: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
+;KNL: vpmovzxwd (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
+ %a = load <16 x i16>,<16 x i16> *%i,align 1
+ %x = zext <16 x i16> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+;SKX-LABEL: sext_16x16mem_to_16x32mask:
+;KNL-LABEL: sext_16x16mem_to_16x32mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm0, %k1
+;SKX-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
+;KNL: vpmovsxwd (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) nounwind readnone {
+ %a = load <16 x i16>,<16 x i16> *%i,align 1
+ %x = sext <16 x i16> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+;SKX-LABEL: sext_16x16mem_to_16x32:
+;KNL-LABEL: sext_16x16mem_to_16x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxwd (%rdi), %zmm0
+;KNL: vpmovsxwd (%rdi), %zmm0
+;SKX-NEXT: retq
+define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone {
+ %a = load <16 x i16>,<16 x i16> *%i,align 1
+ %x = sext <16 x i16> %a to <16 x i32>
+ ret <16 x i32> %x
+}
+;SKX-LABEL: zext_16x16_to_16x32mask:
+;KNL-LABEL: zext_16x16_to_16x32mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovb2m %xmm1, %k1
+;SKX-NEXT: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
+;KNL: vpmovzxwd %ymm0, %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) nounwind readnone {
+ %x = zext <16 x i16> %a to <16 x i32>
+ %ret = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> zeroinitializer
+ ret <16 x i32> %ret
+}
+
+;SKX-LABEL: zext_16x16_to_16x32:
+;KNL-LABEL: zext_16x16_to_16x32:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovzxwd %ymm0, %zmm0
+;KNL: vpmovzxwd %ymm0, %zmm0
+;SKX-NEXT: retq
+define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone {
+ %x = zext <16 x i16> %a to <16 x i32>
ret <16 x i32> %x
}
-; CHECK-LABEL: zext_8x16_to_8x64
-; CHECK: vpmovzxwq
-; CHECK: ret
-define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %i) nounwind readnone {
- %x = zext <8 x i16> %i to <8 x i64>
+;SKX-LABEL: zext_2x16mem_to_2x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovq2m %xmm0, %k1
+;SKX-NEXT: vpmovzxwq (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
+ %a = load <2 x i16>,<2 x i16> *%i,align 1
+ %x = zext <2 x i16> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+;SKX-LABEL: sext_2x16mem_to_2x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovq2m %xmm0, %k1
+;SKX-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounwind readnone {
+ %a = load <2 x i16>,<2 x i16> *%i,align 1
+ %x = sext <2 x i16> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+;SKX-LABEL: sext_2x16mem_to_2x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxwq (%rdi), %xmm0
+;SKX-NEXT: retq
+define <2 x i64> @sext_2x16mem_to_2x64(<2 x i16> *%i) nounwind readnone {
+ %a = load <2 x i16>,<2 x i16> *%i,align 1
+ %x = sext <2 x i16> %a to <2 x i64>
+ ret <2 x i64> %x
+}
+
+;SKX-LABEL: zext_4x16mem_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovzxwq (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = zext <4 x i16> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x16mem_to_4x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = sext <4 x i16> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x16mem_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxwq (%rdi), %ymm0
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x16mem_to_4x64(<4 x i16> *%i) nounwind readnone {
+ %a = load <4 x i16>,<4 x i16> *%i,align 1
+ %x = sext <4 x i16> %a to <4 x i64>
+ ret <4 x i64> %x
+}
+
+;SKX-LABEL: zext_8x16mem_to_8x64:
+;KNL-LABEL: zext_8x16mem_to_8x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
+;KNL: vpmovzxwq (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = zext <8 x i16> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;SKX-LABEL: sext_8x16mem_to_8x64mask:
+;KNL-LABEL: sext_8x16mem_to_8x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
+;KNL: vpmovsxwq (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = sext <8 x i16> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;SKX-LABEL: sext_8x16mem_to_8x64:
+;KNL-LABEL: sext_8x16mem_to_8x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxwq (%rdi), %zmm0
+;KNL: vpmovsxwq (%rdi), %zmm0
+;SKX-NEXT: retq
+define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone {
+ %a = load <8 x i16>,<8 x i16> *%i,align 1
+ %x = sext <8 x i16> %a to <8 x i64>
+ ret <8 x i64> %x
+}
+
+;SKX-LABEL: zext_8x16_to_8x64mask:
+;KNL-LABEL: zext_8x16_to_8x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm1, %k1
+;SKX-NEXT: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
+;KNL: vpmovzxwq %xmm0, %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind readnone {
+ %x = zext <8 x i16> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;SKX-LABEL: zext_8x16_to_8x64:
+;KNL-LABEL: zext_8x16_to_8x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovzxwq %xmm0, %zmm0
+;KNL: vpmovzxwq %xmm0, %zmm0
+;SKX-NEXT: retq
+; KNL: ret
+define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone {
+ %ret = zext <8 x i16> %a to <8 x i64>
+ ret <8 x i64> %ret
+}
+
+;SKX-LABEL: zext_2x32mem_to_2x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovq2m %xmm0, %k1
+;SKX-NEXT: vpmovzxdq (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
+ %a = load <2 x i32>,<2 x i32> *%i,align 1
+ %x = zext <2 x i32> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+;SKX-LABEL: sext_2x32mem_to_2x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovq2m %xmm0, %k1
+;SKX-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounwind readnone {
+ %a = load <2 x i32>,<2 x i32> *%i,align 1
+ %x = sext <2 x i32> %a to <2 x i64>
+ %ret = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+;SKX-LABEL: sext_2x32mem_to_2x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxdq (%rdi), %xmm0
+;SKX-NEXT: retq
+define <2 x i64> @sext_2x32mem_to_2x64(<2 x i32> *%i) nounwind readnone {
+ %a = load <2 x i32>,<2 x i32> *%i,align 1
+ %x = sext <2 x i32> %a to <2 x i64>
+ ret <2 x i64> %x
+}
+
+;SKX-LABEL: zext_4x32mem_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovzxdq (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i32>,<4 x i32> *%i,align 1
+ %x = zext <4 x i32> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x32mem_to_4x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm0, %k1
+;SKX-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounwind readnone {
+ %a = load <4 x i32>,<4 x i32> *%i,align 1
+ %x = sext <4 x i32> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: sext_4x32mem_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxdq (%rdi), %ymm0
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x32mem_to_4x64(<4 x i32> *%i) nounwind readnone {
+ %a = load <4 x i32>,<4 x i32> *%i,align 1
+ %x = sext <4 x i32> %a to <4 x i64>
+ ret <4 x i64> %x
+}
+
+;SKX-LABEL: sext_4x32_to_4x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxdq %xmm0, %ymm0
+;SKX-NEXT: retq
+define <4 x i64> @sext_4x32_to_4x64(<4 x i32> %a) nounwind readnone {
+ %x = sext <4 x i32> %a to <4 x i64>
+ ret <4 x i64> %x
+}
+
+;SKX-LABEL: zext_4x32_to_4x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovd2m %xmm1, %k1
+;SKX-NEXT: vpmovzxdq %xmm0, %ymm0 {%k1} {z}
+;SKX-NEXT: retq
+define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind readnone {
+ %x = zext <4 x i32> %a to <4 x i64>
+ %ret = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> zeroinitializer
+ ret <4 x i64> %ret
+}
+
+;SKX-LABEL: zext_8x32mem_to_8x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovzxdq (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i32>,<8 x i32> *%i,align 1
+ %x = zext <8 x i32> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;SKX-LABEL: sext_8x32mem_to_8x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm0, %k1
+;SKX-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounwind readnone {
+ %a = load <8 x i32>,<8 x i32> *%i,align 1
+ %x = sext <8 x i32> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+
+;SKX-LABEL: sext_8x32mem_to_8x64:
+;KNL-LABEL: sext_8x32mem_to_8x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxdq (%rdi), %zmm0
+;KNL: vpmovsxdq (%rdi), %zmm0
+;SKX-NEXT: retq
+define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone {
+ %a = load <8 x i32>,<8 x i32> *%i,align 1
+ %x = sext <8 x i32> %a to <8 x i64>
+ ret <8 x i64> %x
+}
+
+;SKX-LABEL: sext_8x32_to_8x64:
+;KNL-LABEL: sext_8x32_to_8x64:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovsxdq %ymm0, %zmm0
+;KNL: vpmovsxdq %ymm0, %zmm0
+;SKX-NEXT: retq
+define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone {
+ %x = sext <8 x i32> %a to <8 x i64>
ret <8 x i64> %x
}
-;CHECK-LABEL: fptrunc_test
-;CHECK: vcvtpd2ps {{.*}}%zmm
-;CHECK: ret
+;SKX-LABEL: zext_8x32_to_8x64mask:
+;KNL-LABEL: zext_8x32_to_8x64mask:
+;SKX: ## BB#0:
+;SKX-NEXT: vpmovw2m %xmm1, %k1
+;SKX-NEXT: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
+;KNL: vpmovzxdq %ymm0, %zmm0 {%k1} {z}
+;SKX-NEXT: retq
+define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind readnone {
+ %x = zext <8 x i32> %a to <8 x i64>
+ %ret = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> zeroinitializer
+ ret <8 x i64> %ret
+}
+;KNL-LABEL: fptrunc_test
+;KNL: vcvtpd2ps {{.*}}%zmm
+;KNL: ret
define <8 x float> @fptrunc_test(<8 x double> %a) nounwind readnone {
%b = fptrunc <8 x double> %a to <8 x float>
ret <8 x float> %b
}
-;CHECK-LABEL: fpext_test
-;CHECK: vcvtps2pd {{.*}}%zmm
-;CHECK: ret
+;KNL-LABEL: fpext_test
+;KNL: vcvtps2pd {{.*}}%zmm
+;KNL: ret
define <8 x double> @fpext_test(<8 x float> %a) nounwind readnone {
%b = fpext <8 x float> %a to <8 x double>
ret <8 x double> %b
}
-; CHECK-LABEL: zext_16i1_to_16xi32
-; CHECK: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
-; CHECK: ret
+; KNL-LABEL: zext_16i1_to_16xi32
+; KNL: vpbroadcastd LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL: ret
define <16 x i32> @zext_16i1_to_16xi32(i16 %b) {
%a = bitcast i16 %b to <16 x i1>
%c = zext <16 x i1> %a to <16 x i32>
ret <16 x i32> %c
}
-; CHECK-LABEL: zext_8i1_to_8xi64
-; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
-; CHECK: ret
+; KNL-LABEL: zext_8i1_to_8xi64
+; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL: ret
define <8 x i64> @zext_8i1_to_8xi64(i8 %b) {
%a = bitcast i8 %b to <8 x i1>
%c = zext <8 x i1> %a to <8 x i64>
ret <8 x i64> %c
}
-; CHECK-LABEL: trunc_16i8_to_16i1
-; CHECK: vpmovsxbd
-; CHECK: vpandd
-; CHECK: vptestmd
-; CHECK: ret
+; KNL-LABEL: trunc_16i8_to_16i1
+; KNL: vpmovsxbd
+; KNL: vpandd
+; KNL: vptestmd
+; KNL: ret
; SKX-LABEL: trunc_16i8_to_16i1
; SKX: vpmovb2m %xmm
define i16 @trunc_16i8_to_16i1(<16 x i8> %a) {
@@ -98,10 +846,10 @@ define i16 @trunc_16i8_to_16i1(<16 x i8>
ret i16 %mask
}
-; CHECK-LABEL: trunc_16i32_to_16i1
-; CHECK: vpandd
-; CHECK: vptestmd
-; CHECK: ret
+; KNL-LABEL: trunc_16i32_to_16i1
+; KNL: vpandd
+; KNL: vptestmd
+; KNL: ret
; SKX-LABEL: trunc_16i32_to_16i1
; SKX: vpmovd2m %zmm
define i16 @trunc_16i32_to_16i1(<16 x i32> %a) {
@@ -122,11 +870,11 @@ define <4 x i32> @trunc_4i32_to_4i1(<4 x
ret <4 x i32>%res
}
-; CHECK-LABEL: trunc_8i16_to_8i1
-; CHECK: vpmovsxwq
-; CHECK: vpandq LCP{{.*}}(%rip){1to8}
-; CHECK: vptestmq
-; CHECK: ret
+; KNL-LABEL: trunc_8i16_to_8i1
+; KNL: vpmovsxwq
+; KNL: vpandq LCP{{.*}}(%rip){1to8}
+; KNL: vptestmq
+; KNL: ret
; SKX-LABEL: trunc_8i16_to_8i1
; SKX: vpmovw2m %xmm
@@ -136,10 +884,10 @@ define i8 @trunc_8i16_to_8i1(<8 x i16> %
ret i8 %mask
}
-; CHECK-LABEL: sext_8i1_8i32
-; CHECK: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
+; KNL-LABEL: sext_8i1_8i32
+; KNL: vpbroadcastq LCP{{.*}}(%rip), %zmm0 {%k1} {z}
; SKX: vpmovm2d
-; CHECK: ret
+; KNL: ret
define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
%x = icmp slt <8 x i32> %a1, %a2
%x1 = xor <8 x i1>%x, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
@@ -147,18 +895,18 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32
ret <8 x i32> %y
}
-; CHECK-LABEL: trunc_v16i32_to_v16i16
-; CHECK: vpmovdw
-; CHECK: ret
+; KNL-LABEL: trunc_v16i32_to_v16i16
+; KNL: vpmovdw
+; KNL: ret
define <16 x i16> @trunc_v16i32_to_v16i16(<16 x i32> %x) {
%1 = trunc <16 x i32> %x to <16 x i16>
ret <16 x i16> %1
}
-; CHECK-LABEL: trunc_i32_to_i1
-; CHECK: movw $-4, %ax
-; CHECK: kmovw %eax, %k1
-; CKECK: korw
+; KNL-LABEL: trunc_i32_to_i1
+; KNL: movw $-4, %ax
+; KNL: kmovw %eax, %k1
+; KNL: korw
define i16 @trunc_i32_to_i1(i32 %a) {
%a_i = trunc i32 %a to i1
%maskv = insertelement <16 x i1> <i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i1 %a_i, i32 0
@@ -166,35 +914,35 @@ define i16 @trunc_i32_to_i1(i32 %a) {
ret i16 %res
}
-; CHECK-LABEL: sext_8i1_8i16
+; KNL-LABEL: sext_8i1_8i16
; SKX: vpmovm2w
-; CHECK: ret
+; KNL: ret
define <8 x i16> @sext_8i1_8i16(<8 x i32> %a1, <8 x i32> %a2) nounwind {
%x = icmp slt <8 x i32> %a1, %a2
%y = sext <8 x i1> %x to <8 x i16>
ret <8 x i16> %y
}
-; CHECK-LABEL: sext_16i1_16i32
+; KNL-LABEL: sext_16i1_16i32
; SKX: vpmovm2d
-; CHECK: ret
+; KNL: ret
define <16 x i32> @sext_16i1_16i32(<16 x i32> %a1, <16 x i32> %a2) nounwind {
%x = icmp slt <16 x i32> %a1, %a2
%y = sext <16 x i1> %x to <16 x i32>
ret <16 x i32> %y
}
-; CHECK-LABEL: sext_8i1_8i64
+; KNL-LABEL: sext_8i1_8i64
; SKX: vpmovm2q
-; CHECK: ret
+; KNL: ret
define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind {
%x = icmp slt <8 x i32> %a1, %a2
%y = sext <8 x i1> %x to <8 x i64>
ret <8 x i64> %y
}
-; CHECK-LABEL: @extload_v8i64
-; CHECK: vpmovsxbq
+; KNL-LABEL: @extload_v8i64
+; KNL: vpmovsxbq
define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) {
%sign_load = load <8 x i8>, <8 x i8>* %a
%c = sext <8 x i8> %sign_load to <8 x i64>
Modified: llvm/trunk/test/MC/X86/x86-64-avx512bw.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512bw.s?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512bw.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512bw.s Wed May 27 03:15:19 2015
@@ -511,6 +511,78 @@
// CHECK: encoding: [0x62,0xe2,0x15,0x40,0x3a,0x9a,0xc0,0xdf,0xff,0xff]
vpminuw -8256(%rdx), %zmm29, %zmm19
+// CHECK: vpmovsxbw %ymm18, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x20,0xf2]
+ vpmovsxbw %ymm18, %zmm22
+
+// CHECK: vpmovsxbw %ymm18, %zmm22 {%k5}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x4d,0x20,0xf2]
+ vpmovsxbw %ymm18, %zmm22 {%k5}
+
+// CHECK: vpmovsxbw %ymm18, %zmm22 {%k5} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0xcd,0x20,0xf2]
+ vpmovsxbw %ymm18, %zmm22 {%k5} {z}
+
+// CHECK: vpmovsxbw (%rcx), %zmm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0x31]
+ vpmovsxbw (%rcx), %zmm22
+
+// CHECK: vpmovsxbw 291(%rax,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x20,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxbw 291(%rax,%r14,8), %zmm22
+
+// CHECK: vpmovsxbw 4064(%rdx), %zmm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0x72,0x7f]
+ vpmovsxbw 4064(%rdx), %zmm22
+
+// CHECK: vpmovsxbw 4096(%rdx), %zmm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0xb2,0x00,0x10,0x00,0x00]
+ vpmovsxbw 4096(%rdx), %zmm22
+
+// CHECK: vpmovsxbw -4096(%rdx), %zmm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0x72,0x80]
+ vpmovsxbw -4096(%rdx), %zmm22
+
+// CHECK: vpmovsxbw -4128(%rdx), %zmm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x20,0xb2,0xe0,0xef,0xff,0xff]
+ vpmovsxbw -4128(%rdx), %zmm22
+
+// CHECK: vpmovzxbw %ymm26, %zmm24
+// CHECK: encoding: [0x62,0x02,0x7d,0x48,0x30,0xc2]
+ vpmovzxbw %ymm26, %zmm24
+
+// CHECK: vpmovzxbw %ymm26, %zmm24 {%k4}
+// CHECK: encoding: [0x62,0x02,0x7d,0x4c,0x30,0xc2]
+ vpmovzxbw %ymm26, %zmm24 {%k4}
+
+// CHECK: vpmovzxbw %ymm26, %zmm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0xcc,0x30,0xc2]
+ vpmovzxbw %ymm26, %zmm24 {%k4} {z}
+
+// CHECK: vpmovzxbw (%rcx), %zmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x01]
+ vpmovzxbw (%rcx), %zmm24
+
+// CHECK: vpmovzxbw 291(%rax,%r14,8), %zmm24
+// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x30,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxbw 291(%rax,%r14,8), %zmm24
+
+// CHECK: vpmovzxbw 4064(%rdx), %zmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x42,0x7f]
+ vpmovzxbw 4064(%rdx), %zmm24
+
+// CHECK: vpmovzxbw 4096(%rdx), %zmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x82,0x00,0x10,0x00,0x00]
+ vpmovzxbw 4096(%rdx), %zmm24
+
+// CHECK: vpmovzxbw -4096(%rdx), %zmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x42,0x80]
+ vpmovzxbw -4096(%rdx), %zmm24
+
+// CHECK: vpmovzxbw -4128(%rdx), %zmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x30,0x82,0xe0,0xef,0xff,0xff]
+ vpmovzxbw -4128(%rdx), %zmm24
+
// CHECK: vpmullw %zmm19, %zmm28, %zmm19
// CHECK: encoding: [0x62,0xa1,0x1d,0x40,0xd5,0xdb]
vpmullw %zmm19, %zmm28, %zmm19
Modified: llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s Wed May 27 03:15:19 2015
@@ -1312,6 +1312,150 @@
// CHECK: encoding: [0x62,0xe2,0x25,0x20,0x3a,0xa2,0xe0,0xef,0xff,0xff]
vpminuw -4128(%rdx), %ymm27, %ymm20
+// CHECK: vpmovsxbw %xmm23, %xmm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x20,0xdf]
+ vpmovsxbw %xmm23, %xmm27
+
+// CHECK: vpmovsxbw %xmm23, %xmm27 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7d,0x0f,0x20,0xdf]
+ vpmovsxbw %xmm23, %xmm27 {%k7}
+
+// CHECK: vpmovsxbw %xmm23, %xmm27 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0x8f,0x20,0xdf]
+ vpmovsxbw %xmm23, %xmm27 {%k7} {z}
+
+// CHECK: vpmovsxbw (%rcx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x19]
+ vpmovsxbw (%rcx), %xmm27
+
+// CHECK: vpmovsxbw 291(%rax,%r14,8), %xmm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x20,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxbw 291(%rax,%r14,8), %xmm27
+
+// CHECK: vpmovsxbw 1016(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x5a,0x7f]
+ vpmovsxbw 1016(%rdx), %xmm27
+
+// CHECK: vpmovsxbw 1024(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x9a,0x00,0x04,0x00,0x00]
+ vpmovsxbw 1024(%rdx), %xmm27
+
+// CHECK: vpmovsxbw -1024(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x5a,0x80]
+ vpmovsxbw -1024(%rdx), %xmm27
+
+// CHECK: vpmovsxbw -1032(%rdx), %xmm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x20,0x9a,0xf8,0xfb,0xff,0xff]
+ vpmovsxbw -1032(%rdx), %xmm27
+
+// CHECK: vpmovsxbw %xmm23, %ymm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x20,0xef]
+ vpmovsxbw %xmm23, %ymm21
+
+// CHECK: vpmovsxbw %xmm23, %ymm21 {%k7}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2f,0x20,0xef]
+ vpmovsxbw %xmm23, %ymm21 {%k7}
+
+// CHECK: vpmovsxbw %xmm23, %ymm21 {%k7} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0xaf,0x20,0xef]
+ vpmovsxbw %xmm23, %ymm21 {%k7} {z}
+
+// CHECK: vpmovsxbw (%rcx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0x29]
+ vpmovsxbw (%rcx), %ymm21
+
+// CHECK: vpmovsxbw 291(%rax,%r14,8), %ymm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x20,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxbw 291(%rax,%r14,8), %ymm21
+
+// CHECK: vpmovsxbw 2032(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0x6a,0x7f]
+ vpmovsxbw 2032(%rdx), %ymm21
+
+// CHECK: vpmovsxbw 2048(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0xaa,0x00,0x08,0x00,0x00]
+ vpmovsxbw 2048(%rdx), %ymm21
+
+// CHECK: vpmovsxbw -2048(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0x6a,0x80]
+ vpmovsxbw -2048(%rdx), %ymm21
+
+// CHECK: vpmovsxbw -2064(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x20,0xaa,0xf0,0xf7,0xff,0xff]
+ vpmovsxbw -2064(%rdx), %ymm21
+
+// CHECK: vpmovzxbw %xmm29, %xmm30
+// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x30,0xf5]
+ vpmovzxbw %xmm29, %xmm30
+
+// CHECK: vpmovzxbw %xmm29, %xmm30 {%k7}
+// CHECK: encoding: [0x62,0x02,0x7d,0x0f,0x30,0xf5]
+ vpmovzxbw %xmm29, %xmm30 {%k7}
+
+// CHECK: vpmovzxbw %xmm29, %xmm30 {%k7} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0x8f,0x30,0xf5]
+ vpmovzxbw %xmm29, %xmm30 {%k7} {z}
+
+// CHECK: vpmovzxbw (%rcx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0x31]
+ vpmovzxbw (%rcx), %xmm30
+
+// CHECK: vpmovzxbw 291(%rax,%r14,8), %xmm30
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxbw 291(%rax,%r14,8), %xmm30
+
+// CHECK: vpmovzxbw 1016(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0x72,0x7f]
+ vpmovzxbw 1016(%rdx), %xmm30
+
+// CHECK: vpmovzxbw 1024(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0xb2,0x00,0x04,0x00,0x00]
+ vpmovzxbw 1024(%rdx), %xmm30
+
+// CHECK: vpmovzxbw -1024(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0x72,0x80]
+ vpmovzxbw -1024(%rdx), %xmm30
+
+// CHECK: vpmovzxbw -1032(%rdx), %xmm30
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x30,0xb2,0xf8,0xfb,0xff,0xff]
+ vpmovzxbw -1032(%rdx), %xmm30
+
+// CHECK: vpmovzxbw %xmm29, %ymm22
+// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x30,0xf5]
+ vpmovzxbw %xmm29, %ymm22
+
+// CHECK: vpmovzxbw %xmm29, %ymm22 {%k2}
+// CHECK: encoding: [0x62,0x82,0x7d,0x2a,0x30,0xf5]
+ vpmovzxbw %xmm29, %ymm22 {%k2}
+
+// CHECK: vpmovzxbw %xmm29, %ymm22 {%k2} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0xaa,0x30,0xf5]
+ vpmovzxbw %xmm29, %ymm22 {%k2} {z}
+
+// CHECK: vpmovzxbw (%rcx), %ymm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0x31]
+ vpmovzxbw (%rcx), %ymm22
+
+// CHECK: vpmovzxbw 291(%rax,%r14,8), %ymm22
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x30,0xb4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxbw 291(%rax,%r14,8), %ymm22
+
+// CHECK: vpmovzxbw 2032(%rdx), %ymm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0x72,0x7f]
+ vpmovzxbw 2032(%rdx), %ymm22
+
+// CHECK: vpmovzxbw 2048(%rdx), %ymm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0xb2,0x00,0x08,0x00,0x00]
+ vpmovzxbw 2048(%rdx), %ymm22
+
+// CHECK: vpmovzxbw -2048(%rdx), %ymm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0x72,0x80]
+ vpmovzxbw -2048(%rdx), %ymm22
+
+// CHECK: vpmovzxbw -2064(%rdx), %ymm22
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x30,0xb2,0xf0,0xf7,0xff,0xff]
+ vpmovzxbw -2064(%rdx), %ymm22
+
// CHECK: vpmullw %xmm26, %xmm19, %xmm29
// CHECK: encoding: [0x62,0x01,0x65,0x00,0xd5,0xea]
vpmullw %xmm26, %xmm19, %xmm29
Modified: llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s?rev=238301&r1=238300&r2=238301&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s Wed May 27 03:15:19 2015
@@ -4524,6 +4524,726 @@
// CHECK: encoding: [0x62,0x62,0xd5,0x30,0x3b,0xaa,0xf8,0xfb,0xff,0xff]
vpminuq -1032(%rdx){1to4}, %ymm21, %ymm29
+// CHECK: vpmovsxbd %xmm28, %xmm24
+// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x21,0xc4]
+ vpmovsxbd %xmm28, %xmm24
+
+// CHECK: vpmovsxbd %xmm28, %xmm24 {%k1}
+// CHECK: encoding: [0x62,0x02,0x7d,0x09,0x21,0xc4]
+ vpmovsxbd %xmm28, %xmm24 {%k1}
+
+// CHECK: vpmovsxbd %xmm28, %xmm24 {%k1} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0x89,0x21,0xc4]
+ vpmovsxbd %xmm28, %xmm24 {%k1} {z}
+
+// CHECK: vpmovsxbd (%rcx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x01]
+ vpmovsxbd (%rcx), %xmm24
+
+// CHECK: vpmovsxbd 291(%rax,%r14,8), %xmm24
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x21,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxbd 291(%rax,%r14,8), %xmm24
+
+// CHECK: vpmovsxbd 508(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x42,0x7f]
+ vpmovsxbd 508(%rdx), %xmm24
+
+// CHECK: vpmovsxbd 512(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x82,0x00,0x02,0x00,0x00]
+ vpmovsxbd 512(%rdx), %xmm24
+
+// CHECK: vpmovsxbd -512(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x42,0x80]
+ vpmovsxbd -512(%rdx), %xmm24
+
+// CHECK: vpmovsxbd -516(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x21,0x82,0xfc,0xfd,0xff,0xff]
+ vpmovsxbd -516(%rdx), %xmm24
+
+// CHECK: vpmovsxbd %xmm20, %ymm24
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x21,0xc4]
+ vpmovsxbd %xmm20, %ymm24
+
+// CHECK: vpmovsxbd %xmm20, %ymm24 {%k3}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2b,0x21,0xc4]
+ vpmovsxbd %xmm20, %ymm24 {%k3}
+
+// CHECK: vpmovsxbd %xmm20, %ymm24 {%k3} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xab,0x21,0xc4]
+ vpmovsxbd %xmm20, %ymm24 {%k3} {z}
+
+// CHECK: vpmovsxbd (%rcx), %ymm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x01]
+ vpmovsxbd (%rcx), %ymm24
+
+// CHECK: vpmovsxbd 291(%rax,%r14,8), %ymm24
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x21,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxbd 291(%rax,%r14,8), %ymm24
+
+// CHECK: vpmovsxbd 1016(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x42,0x7f]
+ vpmovsxbd 1016(%rdx), %ymm24
+
+// CHECK: vpmovsxbd 1024(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x82,0x00,0x04,0x00,0x00]
+ vpmovsxbd 1024(%rdx), %ymm24
+
+// CHECK: vpmovsxbd -1024(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x42,0x80]
+ vpmovsxbd -1024(%rdx), %ymm24
+
+// CHECK: vpmovsxbd -1032(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x21,0x82,0xf8,0xfb,0xff,0xff]
+ vpmovsxbd -1032(%rdx), %ymm24
+
+// CHECK: vpmovsxbq %xmm22, %xmm17
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x22,0xce]
+ vpmovsxbq %xmm22, %xmm17
+
+// CHECK: vpmovsxbq %xmm22, %xmm17 {%k5}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0d,0x22,0xce]
+ vpmovsxbq %xmm22, %xmm17 {%k5}
+
+// CHECK: vpmovsxbq %xmm22, %xmm17 {%k5} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x8d,0x22,0xce]
+ vpmovsxbq %xmm22, %xmm17 {%k5} {z}
+
+// CHECK: vpmovsxbq (%rcx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x09]
+ vpmovsxbq (%rcx), %xmm17
+
+// CHECK: vpmovsxbq 291(%rax,%r14,8), %xmm17
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x22,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxbq 291(%rax,%r14,8), %xmm17
+
+// CHECK: vpmovsxbq 254(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x4a,0x7f]
+ vpmovsxbq 254(%rdx), %xmm17
+
+// CHECK: vpmovsxbq 256(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x8a,0x00,0x01,0x00,0x00]
+ vpmovsxbq 256(%rdx), %xmm17
+
+// CHECK: vpmovsxbq -256(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x4a,0x80]
+ vpmovsxbq -256(%rdx), %xmm17
+
+// CHECK: vpmovsxbq -258(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x22,0x8a,0xfe,0xfe,0xff,0xff]
+ vpmovsxbq -258(%rdx), %xmm17
+
+// CHECK: vpmovsxbq %xmm26, %ymm28
+// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x22,0xe2]
+ vpmovsxbq %xmm26, %ymm28
+
+// CHECK: vpmovsxbq %xmm26, %ymm28 {%k5}
+// CHECK: encoding: [0x62,0x02,0x7d,0x2d,0x22,0xe2]
+ vpmovsxbq %xmm26, %ymm28 {%k5}
+
+// CHECK: vpmovsxbq %xmm26, %ymm28 {%k5} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0xad,0x22,0xe2]
+ vpmovsxbq %xmm26, %ymm28 {%k5} {z}
+
+// CHECK: vpmovsxbq (%rcx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0x21]
+ vpmovsxbq (%rcx), %ymm28
+
+// CHECK: vpmovsxbq 291(%rax,%r14,8), %ymm28
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x22,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxbq 291(%rax,%r14,8), %ymm28
+
+// CHECK: vpmovsxbq 508(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0x62,0x7f]
+ vpmovsxbq 508(%rdx), %ymm28
+
+// CHECK: vpmovsxbq 512(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0xa2,0x00,0x02,0x00,0x00]
+ vpmovsxbq 512(%rdx), %ymm28
+
+// CHECK: vpmovsxbq -512(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0x62,0x80]
+ vpmovsxbq -512(%rdx), %ymm28
+
+// CHECK: vpmovsxbq -516(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x22,0xa2,0xfc,0xfd,0xff,0xff]
+ vpmovsxbq -516(%rdx), %ymm28
+
+// CHECK: vpmovsxdq %xmm26, %xmm23
+// CHECK: encoding: [0x62,0x82,0x7d,0x08,0x25,0xfa]
+ vpmovsxdq %xmm26, %xmm23
+
+// CHECK: vpmovsxdq %xmm26, %xmm23 {%k7}
+// CHECK: encoding: [0x62,0x82,0x7d,0x0f,0x25,0xfa]
+ vpmovsxdq %xmm26, %xmm23 {%k7}
+
+// CHECK: vpmovsxdq %xmm26, %xmm23 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0x8f,0x25,0xfa]
+ vpmovsxdq %xmm26, %xmm23 {%k7} {z}
+
+// CHECK: vpmovsxdq (%rcx), %xmm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0x39]
+ vpmovsxdq (%rcx), %xmm23
+
+// CHECK: vpmovsxdq 291(%rax,%r14,8), %xmm23
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxdq 291(%rax,%r14,8), %xmm23
+
+// CHECK: vpmovsxdq 1016(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0x7a,0x7f]
+ vpmovsxdq 1016(%rdx), %xmm23
+
+// CHECK: vpmovsxdq 1024(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0xba,0x00,0x04,0x00,0x00]
+ vpmovsxdq 1024(%rdx), %xmm23
+
+// CHECK: vpmovsxdq -1024(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0x7a,0x80]
+ vpmovsxdq -1024(%rdx), %xmm23
+
+// CHECK: vpmovsxdq -1032(%rdx), %xmm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x25,0xba,0xf8,0xfb,0xff,0xff]
+ vpmovsxdq -1032(%rdx), %xmm23
+
+// CHECK: vpmovsxdq %xmm28, %ymm18
+// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x25,0xd4]
+ vpmovsxdq %xmm28, %ymm18
+
+// CHECK: vpmovsxdq %xmm28, %ymm18 {%k7}
+// CHECK: encoding: [0x62,0x82,0x7d,0x2f,0x25,0xd4]
+ vpmovsxdq %xmm28, %ymm18 {%k7}
+
+// CHECK: vpmovsxdq %xmm28, %ymm18 {%k7} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0xaf,0x25,0xd4]
+ vpmovsxdq %xmm28, %ymm18 {%k7} {z}
+
+// CHECK: vpmovsxdq (%rcx), %ymm18
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x11]
+ vpmovsxdq (%rcx), %ymm18
+
+// CHECK: vpmovsxdq 291(%rax,%r14,8), %ymm18
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x25,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxdq 291(%rax,%r14,8), %ymm18
+
+// CHECK: vpmovsxdq 2032(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x52,0x7f]
+ vpmovsxdq 2032(%rdx), %ymm18
+
+// CHECK: vpmovsxdq 2048(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x92,0x00,0x08,0x00,0x00]
+ vpmovsxdq 2048(%rdx), %ymm18
+
+// CHECK: vpmovsxdq -2048(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x52,0x80]
+ vpmovsxdq -2048(%rdx), %ymm18
+
+// CHECK: vpmovsxdq -2064(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x25,0x92,0xf0,0xf7,0xff,0xff]
+ vpmovsxdq -2064(%rdx), %ymm18
+
+// CHECK: vpmovsxwd %xmm18, %xmm17
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x23,0xca]
+ vpmovsxwd %xmm18, %xmm17
+
+// CHECK: vpmovsxwd %xmm18, %xmm17 {%k4}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x0c,0x23,0xca]
+ vpmovsxwd %xmm18, %xmm17 {%k4}
+
+// CHECK: vpmovsxwd %xmm18, %xmm17 {%k4} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x8c,0x23,0xca]
+ vpmovsxwd %xmm18, %xmm17 {%k4} {z}
+
+// CHECK: vpmovsxwd (%rcx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x09]
+ vpmovsxwd (%rcx), %xmm17
+
+// CHECK: vpmovsxwd 291(%rax,%r14,8), %xmm17
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x23,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxwd 291(%rax,%r14,8), %xmm17
+
+// CHECK: vpmovsxwd 1016(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x4a,0x7f]
+ vpmovsxwd 1016(%rdx), %xmm17
+
+// CHECK: vpmovsxwd 1024(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x8a,0x00,0x04,0x00,0x00]
+ vpmovsxwd 1024(%rdx), %xmm17
+
+// CHECK: vpmovsxwd -1024(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x4a,0x80]
+ vpmovsxwd -1024(%rdx), %xmm17
+
+// CHECK: vpmovsxwd -1032(%rdx), %xmm17
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x23,0x8a,0xf8,0xfb,0xff,0xff]
+ vpmovsxwd -1032(%rdx), %xmm17
+
+// CHECK: vpmovsxwd %xmm25, %ymm21
+// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x23,0xe9]
+ vpmovsxwd %xmm25, %ymm21
+
+// CHECK: vpmovsxwd %xmm25, %ymm21 {%k5}
+// CHECK: encoding: [0x62,0x82,0x7d,0x2d,0x23,0xe9]
+ vpmovsxwd %xmm25, %ymm21 {%k5}
+
+// CHECK: vpmovsxwd %xmm25, %ymm21 {%k5} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0xad,0x23,0xe9]
+ vpmovsxwd %xmm25, %ymm21 {%k5} {z}
+
+// CHECK: vpmovsxwd (%rcx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0x29]
+ vpmovsxwd (%rcx), %ymm21
+
+// CHECK: vpmovsxwd 291(%rax,%r14,8), %ymm21
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x23,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxwd 291(%rax,%r14,8), %ymm21
+
+// CHECK: vpmovsxwd 2032(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0x6a,0x7f]
+ vpmovsxwd 2032(%rdx), %ymm21
+
+// CHECK: vpmovsxwd 2048(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0xaa,0x00,0x08,0x00,0x00]
+ vpmovsxwd 2048(%rdx), %ymm21
+
+// CHECK: vpmovsxwd -2048(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0x6a,0x80]
+ vpmovsxwd -2048(%rdx), %ymm21
+
+// CHECK: vpmovsxwd -2064(%rdx), %ymm21
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x23,0xaa,0xf0,0xf7,0xff,0xff]
+ vpmovsxwd -2064(%rdx), %ymm21
+
+// CHECK: vpmovsxwq %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x24,0xec]
+ vpmovsxwq %xmm20, %xmm29
+
+// CHECK: vpmovsxwq %xmm20, %xmm29 {%k6}
+// CHECK: encoding: [0x62,0x22,0x7d,0x0e,0x24,0xec]
+ vpmovsxwq %xmm20, %xmm29 {%k6}
+
+// CHECK: vpmovsxwq %xmm20, %xmm29 {%k6} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0x8e,0x24,0xec]
+ vpmovsxwq %xmm20, %xmm29 {%k6} {z}
+
+// CHECK: vpmovsxwq (%rcx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0x29]
+ vpmovsxwq (%rcx), %xmm29
+
+// CHECK: vpmovsxwq 291(%rax,%r14,8), %xmm29
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x24,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxwq 291(%rax,%r14,8), %xmm29
+
+// CHECK: vpmovsxwq 508(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0x6a,0x7f]
+ vpmovsxwq 508(%rdx), %xmm29
+
+// CHECK: vpmovsxwq 512(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0xaa,0x00,0x02,0x00,0x00]
+ vpmovsxwq 512(%rdx), %xmm29
+
+// CHECK: vpmovsxwq -512(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0x6a,0x80]
+ vpmovsxwq -512(%rdx), %xmm29
+
+// CHECK: vpmovsxwq -516(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x24,0xaa,0xfc,0xfd,0xff,0xff]
+ vpmovsxwq -516(%rdx), %xmm29
+
+// CHECK: vpmovsxwq %xmm17, %ymm23
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x24,0xf9]
+ vpmovsxwq %xmm17, %ymm23
+
+// CHECK: vpmovsxwq %xmm17, %ymm23 {%k5}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x2d,0x24,0xf9]
+ vpmovsxwq %xmm17, %ymm23 {%k5}
+
+// CHECK: vpmovsxwq %xmm17, %ymm23 {%k5} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0xad,0x24,0xf9]
+ vpmovsxwq %xmm17, %ymm23 {%k5} {z}
+
+// CHECK: vpmovsxwq (%rcx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0x39]
+ vpmovsxwq (%rcx), %ymm23
+
+// CHECK: vpmovsxwq 291(%rax,%r14,8), %ymm23
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x24,0xbc,0xf0,0x23,0x01,0x00,0x00]
+ vpmovsxwq 291(%rax,%r14,8), %ymm23
+
+// CHECK: vpmovsxwq 1016(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0x7a,0x7f]
+ vpmovsxwq 1016(%rdx), %ymm23
+
+// CHECK: vpmovsxwq 1024(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0xba,0x00,0x04,0x00,0x00]
+ vpmovsxwq 1024(%rdx), %ymm23
+
+// CHECK: vpmovsxwq -1024(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0x7a,0x80]
+ vpmovsxwq -1024(%rdx), %ymm23
+
+// CHECK: vpmovsxwq -1032(%rdx), %ymm23
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x24,0xba,0xf8,0xfb,0xff,0xff]
+ vpmovsxwq -1032(%rdx), %ymm23
+
+// CHECK: vpmovzxbd %xmm17, %xmm24
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x31,0xc1]
+ vpmovzxbd %xmm17, %xmm24
+
+// CHECK: vpmovzxbd %xmm17, %xmm24 {%k6}
+// CHECK: encoding: [0x62,0x22,0x7d,0x0e,0x31,0xc1]
+ vpmovzxbd %xmm17, %xmm24 {%k6}
+
+// CHECK: vpmovzxbd %xmm17, %xmm24 {%k6} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0x8e,0x31,0xc1]
+ vpmovzxbd %xmm17, %xmm24 {%k6} {z}
+
+// CHECK: vpmovzxbd (%rcx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x01]
+ vpmovzxbd (%rcx), %xmm24
+
+// CHECK: vpmovzxbd 291(%rax,%r14,8), %xmm24
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x31,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxbd 291(%rax,%r14,8), %xmm24
+
+// CHECK: vpmovzxbd 508(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x42,0x7f]
+ vpmovzxbd 508(%rdx), %xmm24
+
+// CHECK: vpmovzxbd 512(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x82,0x00,0x02,0x00,0x00]
+ vpmovzxbd 512(%rdx), %xmm24
+
+// CHECK: vpmovzxbd -512(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x42,0x80]
+ vpmovzxbd -512(%rdx), %xmm24
+
+// CHECK: vpmovzxbd -516(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x31,0x82,0xfc,0xfd,0xff,0xff]
+ vpmovzxbd -516(%rdx), %xmm24
+
+// CHECK: vpmovzxbd %xmm17, %ymm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x31,0xd9]
+ vpmovzxbd %xmm17, %ymm27
+
+// CHECK: vpmovzxbd %xmm17, %ymm27 {%k1}
+// CHECK: encoding: [0x62,0x22,0x7d,0x29,0x31,0xd9]
+ vpmovzxbd %xmm17, %ymm27 {%k1}
+
+// CHECK: vpmovzxbd %xmm17, %ymm27 {%k1} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xa9,0x31,0xd9]
+ vpmovzxbd %xmm17, %ymm27 {%k1} {z}
+
+// CHECK: vpmovzxbd (%rcx), %ymm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x19]
+ vpmovzxbd (%rcx), %ymm27
+
+// CHECK: vpmovzxbd 291(%rax,%r14,8), %ymm27
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x31,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxbd 291(%rax,%r14,8), %ymm27
+
+// CHECK: vpmovzxbd 1016(%rdx), %ymm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x5a,0x7f]
+ vpmovzxbd 1016(%rdx), %ymm27
+
+// CHECK: vpmovzxbd 1024(%rdx), %ymm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x9a,0x00,0x04,0x00,0x00]
+ vpmovzxbd 1024(%rdx), %ymm27
+
+// CHECK: vpmovzxbd -1024(%rdx), %ymm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x5a,0x80]
+ vpmovzxbd -1024(%rdx), %ymm27
+
+// CHECK: vpmovzxbd -1032(%rdx), %ymm27
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x31,0x9a,0xf8,0xfb,0xff,0xff]
+ vpmovzxbd -1032(%rdx), %ymm27
+
+// CHECK: vpmovzxbq %xmm19, %xmm19
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x32,0xdb]
+ vpmovzxbq %xmm19, %xmm19
+
+// CHECK: vpmovzxbq %xmm19, %xmm19 {%k1}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x09,0x32,0xdb]
+ vpmovzxbq %xmm19, %xmm19 {%k1}
+
+// CHECK: vpmovzxbq %xmm19, %xmm19 {%k1} {z}
+// CHECK: encoding: [0x62,0xa2,0x7d,0x89,0x32,0xdb]
+ vpmovzxbq %xmm19, %xmm19 {%k1} {z}
+
+// CHECK: vpmovzxbq (%rcx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x19]
+ vpmovzxbq (%rcx), %xmm19
+
+// CHECK: vpmovzxbq 291(%rax,%r14,8), %xmm19
+// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x32,0x9c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxbq 291(%rax,%r14,8), %xmm19
+
+// CHECK: vpmovzxbq 254(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x5a,0x7f]
+ vpmovzxbq 254(%rdx), %xmm19
+
+// CHECK: vpmovzxbq 256(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x9a,0x00,0x01,0x00,0x00]
+ vpmovzxbq 256(%rdx), %xmm19
+
+// CHECK: vpmovzxbq -256(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x5a,0x80]
+ vpmovzxbq -256(%rdx), %xmm19
+
+// CHECK: vpmovzxbq -258(%rdx), %xmm19
+// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x32,0x9a,0xfe,0xfe,0xff,0xff]
+ vpmovzxbq -258(%rdx), %xmm19
+
+// CHECK: vpmovzxbq %xmm19, %ymm24
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x32,0xc3]
+ vpmovzxbq %xmm19, %ymm24
+
+// CHECK: vpmovzxbq %xmm19, %ymm24 {%k2}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2a,0x32,0xc3]
+ vpmovzxbq %xmm19, %ymm24 {%k2}
+
+// CHECK: vpmovzxbq %xmm19, %ymm24 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xaa,0x32,0xc3]
+ vpmovzxbq %xmm19, %ymm24 {%k2} {z}
+
+// CHECK: vpmovzxbq (%rcx), %ymm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x01]
+ vpmovzxbq (%rcx), %ymm24
+
+// CHECK: vpmovzxbq 291(%rax,%r14,8), %ymm24
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x32,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxbq 291(%rax,%r14,8), %ymm24
+
+// CHECK: vpmovzxbq 508(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x42,0x7f]
+ vpmovzxbq 508(%rdx), %ymm24
+
+// CHECK: vpmovzxbq 512(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x82,0x00,0x02,0x00,0x00]
+ vpmovzxbq 512(%rdx), %ymm24
+
+// CHECK: vpmovzxbq -512(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x42,0x80]
+ vpmovzxbq -512(%rdx), %ymm24
+
+// CHECK: vpmovzxbq -516(%rdx), %ymm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x32,0x82,0xfc,0xfd,0xff,0xff]
+ vpmovzxbq -516(%rdx), %ymm24
+
+// CHECK: vpmovzxdq %xmm21, %xmm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x35,0xcd]
+ vpmovzxdq %xmm21, %xmm25
+
+// CHECK: vpmovzxdq %xmm21, %xmm25 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7d,0x0f,0x35,0xcd]
+ vpmovzxdq %xmm21, %xmm25 {%k7}
+
+// CHECK: vpmovzxdq %xmm21, %xmm25 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0x8f,0x35,0xcd]
+ vpmovzxdq %xmm21, %xmm25 {%k7} {z}
+
+// CHECK: vpmovzxdq (%rcx), %xmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x09]
+ vpmovzxdq (%rcx), %xmm25
+
+// CHECK: vpmovzxdq 291(%rax,%r14,8), %xmm25
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x35,0x8c,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxdq 291(%rax,%r14,8), %xmm25
+
+// CHECK: vpmovzxdq 1016(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x4a,0x7f]
+ vpmovzxdq 1016(%rdx), %xmm25
+
+// CHECK: vpmovzxdq 1024(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x8a,0x00,0x04,0x00,0x00]
+ vpmovzxdq 1024(%rdx), %xmm25
+
+// CHECK: vpmovzxdq -1024(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x4a,0x80]
+ vpmovzxdq -1024(%rdx), %xmm25
+
+// CHECK: vpmovzxdq -1032(%rdx), %xmm25
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x35,0x8a,0xf8,0xfb,0xff,0xff]
+ vpmovzxdq -1032(%rdx), %xmm25
+
+// CHECK: vpmovzxdq %xmm22, %ymm28
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x35,0xe6]
+ vpmovzxdq %xmm22, %ymm28
+
+// CHECK: vpmovzxdq %xmm22, %ymm28 {%k7}
+// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x35,0xe6]
+ vpmovzxdq %xmm22, %ymm28 {%k7}
+
+// CHECK: vpmovzxdq %xmm22, %ymm28 {%k7} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x35,0xe6]
+ vpmovzxdq %xmm22, %ymm28 {%k7} {z}
+
+// CHECK: vpmovzxdq (%rcx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0x21]
+ vpmovzxdq (%rcx), %ymm28
+
+// CHECK: vpmovzxdq 291(%rax,%r14,8), %ymm28
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x35,0xa4,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxdq 291(%rax,%r14,8), %ymm28
+
+// CHECK: vpmovzxdq 2032(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0x62,0x7f]
+ vpmovzxdq 2032(%rdx), %ymm28
+
+// CHECK: vpmovzxdq 2048(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0xa2,0x00,0x08,0x00,0x00]
+ vpmovzxdq 2048(%rdx), %ymm28
+
+// CHECK: vpmovzxdq -2048(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0x62,0x80]
+ vpmovzxdq -2048(%rdx), %ymm28
+
+// CHECK: vpmovzxdq -2064(%rdx), %ymm28
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x35,0xa2,0xf0,0xf7,0xff,0xff]
+ vpmovzxdq -2064(%rdx), %ymm28
+
+// CHECK: vpmovzxwd %xmm17, %xmm24
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x33,0xc1]
+ vpmovzxwd %xmm17, %xmm24
+
+// CHECK: vpmovzxwd %xmm17, %xmm24 {%k4}
+// CHECK: encoding: [0x62,0x22,0x7d,0x0c,0x33,0xc1]
+ vpmovzxwd %xmm17, %xmm24 {%k4}
+
+// CHECK: vpmovzxwd %xmm17, %xmm24 {%k4} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0x8c,0x33,0xc1]
+ vpmovzxwd %xmm17, %xmm24 {%k4} {z}
+
+// CHECK: vpmovzxwd (%rcx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x01]
+ vpmovzxwd (%rcx), %xmm24
+
+// CHECK: vpmovzxwd 291(%rax,%r14,8), %xmm24
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x33,0x84,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxwd 291(%rax,%r14,8), %xmm24
+
+// CHECK: vpmovzxwd 1016(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x42,0x7f]
+ vpmovzxwd 1016(%rdx), %xmm24
+
+// CHECK: vpmovzxwd 1024(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x82,0x00,0x04,0x00,0x00]
+ vpmovzxwd 1024(%rdx), %xmm24
+
+// CHECK: vpmovzxwd -1024(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x42,0x80]
+ vpmovzxwd -1024(%rdx), %xmm24
+
+// CHECK: vpmovzxwd -1032(%rdx), %xmm24
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x33,0x82,0xf8,0xfb,0xff,0xff]
+ vpmovzxwd -1032(%rdx), %xmm24
+
+// CHECK: vpmovzxwd %xmm29, %ymm26
+// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x33,0xd5]
+ vpmovzxwd %xmm29, %ymm26
+
+// CHECK: vpmovzxwd %xmm29, %ymm26 {%k5}
+// CHECK: encoding: [0x62,0x02,0x7d,0x2d,0x33,0xd5]
+ vpmovzxwd %xmm29, %ymm26 {%k5}
+
+// CHECK: vpmovzxwd %xmm29, %ymm26 {%k5} {z}
+// CHECK: encoding: [0x62,0x02,0x7d,0xad,0x33,0xd5]
+ vpmovzxwd %xmm29, %ymm26 {%k5} {z}
+
+// CHECK: vpmovzxwd (%rcx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x11]
+ vpmovzxwd (%rcx), %ymm26
+
+// CHECK: vpmovzxwd 291(%rax,%r14,8), %ymm26
+// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x33,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxwd 291(%rax,%r14,8), %ymm26
+
+// CHECK: vpmovzxwd 2032(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x52,0x7f]
+ vpmovzxwd 2032(%rdx), %ymm26
+
+// CHECK: vpmovzxwd 2048(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x92,0x00,0x08,0x00,0x00]
+ vpmovzxwd 2048(%rdx), %ymm26
+
+// CHECK: vpmovzxwd -2048(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x52,0x80]
+ vpmovzxwd -2048(%rdx), %ymm26
+
+// CHECK: vpmovzxwd -2064(%rdx), %ymm26
+// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x33,0x92,0xf0,0xf7,0xff,0xff]
+ vpmovzxwd -2064(%rdx), %ymm26
+
+// CHECK: vpmovzxwq %xmm20, %xmm29
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x34,0xec]
+ vpmovzxwq %xmm20, %xmm29
+
+// CHECK: vpmovzxwq %xmm20, %xmm29 {%k2}
+// CHECK: encoding: [0x62,0x22,0x7d,0x0a,0x34,0xec]
+ vpmovzxwq %xmm20, %xmm29 {%k2}
+
+// CHECK: vpmovzxwq %xmm20, %xmm29 {%k2} {z}
+// CHECK: encoding: [0x62,0x22,0x7d,0x8a,0x34,0xec]
+ vpmovzxwq %xmm20, %xmm29 {%k2} {z}
+
+// CHECK: vpmovzxwq (%rcx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0x29]
+ vpmovzxwq (%rcx), %xmm29
+
+// CHECK: vpmovzxwq 291(%rax,%r14,8), %xmm29
+// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x34,0xac,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxwq 291(%rax,%r14,8), %xmm29
+
+// CHECK: vpmovzxwq 508(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0x6a,0x7f]
+ vpmovzxwq 508(%rdx), %xmm29
+
+// CHECK: vpmovzxwq 512(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0xaa,0x00,0x02,0x00,0x00]
+ vpmovzxwq 512(%rdx), %xmm29
+
+// CHECK: vpmovzxwq -512(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0x6a,0x80]
+ vpmovzxwq -512(%rdx), %xmm29
+
+// CHECK: vpmovzxwq -516(%rdx), %xmm29
+// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x34,0xaa,0xfc,0xfd,0xff,0xff]
+ vpmovzxwq -516(%rdx), %xmm29
+
+// CHECK: vpmovzxwq %xmm25, %ymm18
+// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x34,0xd1]
+ vpmovzxwq %xmm25, %ymm18
+
+// CHECK: vpmovzxwq %xmm25, %ymm18 {%k1}
+// CHECK: encoding: [0x62,0x82,0x7d,0x29,0x34,0xd1]
+ vpmovzxwq %xmm25, %ymm18 {%k1}
+
+// CHECK: vpmovzxwq %xmm25, %ymm18 {%k1} {z}
+// CHECK: encoding: [0x62,0x82,0x7d,0xa9,0x34,0xd1]
+ vpmovzxwq %xmm25, %ymm18 {%k1} {z}
+
+// CHECK: vpmovzxwq (%rcx), %ymm18
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x11]
+ vpmovzxwq (%rcx), %ymm18
+
+// CHECK: vpmovzxwq 291(%rax,%r14,8), %ymm18
+// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x34,0x94,0xf0,0x23,0x01,0x00,0x00]
+ vpmovzxwq 291(%rax,%r14,8), %ymm18
+
+// CHECK: vpmovzxwq 1016(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x52,0x7f]
+ vpmovzxwq 1016(%rdx), %ymm18
+
+// CHECK: vpmovzxwq 1024(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x92,0x00,0x04,0x00,0x00]
+ vpmovzxwq 1024(%rdx), %ymm18
+
+// CHECK: vpmovzxwq -1024(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x52,0x80]
+ vpmovzxwq -1024(%rdx), %ymm18
+
+// CHECK: vpmovzxwq -1032(%rdx), %ymm18
+// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x34,0x92,0xf8,0xfb,0xff,0xff]
+ vpmovzxwq -1032(%rdx), %ymm18
+
// CHECK: vpmulld %xmm24, %xmm19, %xmm25
// CHECK: encoding: [0x62,0x02,0x65,0x00,0x40,0xc8]
vpmulld %xmm24, %xmm19, %xmm25
More information about the llvm-commits
mailing list