[llvm] r248111 - AVX512: Implement instructions encoding, lowering and intrinsics
Igor Breger via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 19 23:52:42 PDT 2015
Author: ibreger
Date: Sun Sep 20 01:52:42 2015
New Revision: 248111
URL: http://llvm.org/viewvc/llvm-project?rev=248111&view=rev
Log:
AVX512: Implement instructions encoding, lowering and intrinsics
vinserti64x4, vinserti64x2, vinserti32x8, vinserti32x4, vinsertf64x4, vinsertf64x2, vinsertf32x8, vinsertf32x4
Added tests for encoding, lowering and intrinsics.
Differential Revision: http://reviews.llvm.org/D11893
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll
llvm/trunk/test/MC/X86/avx512-encodings.s
llvm/trunk/test/MC/X86/x86-64-avx512dq.s
llvm/trunk/test/MC/X86/x86-64-avx512dq_vl.s
llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Sun Sep 20 01:52:42 2015
@@ -2279,6 +2279,78 @@ let TargetPrefix = "x86" in { // All in
GCCBuiltin<"__builtin_ia32_extracti64x4_mask">,
Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i32_ty,
llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_insertf32x4_256 :
+ GCCBuiltin<"__builtin_ia32_insertf32x4_256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_v8f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_insertf32x4_512 :
+ GCCBuiltin<"__builtin_ia32_insertf32x4_512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_insertf32x8_512 :
+ GCCBuiltin<"__builtin_ia32_insertf32x8_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_v16f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_insertf64x2_256 :
+ GCCBuiltin<"__builtin_ia32_insertf64x2_256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_v4f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_insertf64x2_512 :
+ GCCBuiltin<"__builtin_ia32_insertf64x2_512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_insertf64x4_512 :
+ GCCBuiltin<"__builtin_ia32_insertf64x4_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_v8f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_inserti32x4_256 :
+ GCCBuiltin<"__builtin_ia32_inserti32x4_256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_inserti32x4_512 :
+ GCCBuiltin<"__builtin_ia32_inserti32x4_512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_inserti32x8_512 :
+ GCCBuiltin<"__builtin_ia32_inserti32x8_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_inserti64x2_256 :
+ GCCBuiltin<"__builtin_ia32_inserti64x2_256_mask">,
+ Intrinsic<[llvm_v4i64_ty],
+ [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_inserti64x2_512 :
+ GCCBuiltin<"__builtin_ia32_inserti64x2_512_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_inserti64x4_512 :
+ GCCBuiltin<"__builtin_ia32_inserti64x4_mask">,
+ Intrinsic<[llvm_v8i64_ty],
+ [llvm_v8i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
}
// Conditional load ops
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Sep 20 01:52:42 2015
@@ -15973,7 +15973,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
Mask, PassThru, Subtarget, DAG);
}
case INTR_TYPE_3OP_IMM8_MASK:
- case INTR_TYPE_3OP_MASK: {
+ case INTR_TYPE_3OP_MASK:
+ case INSERT_SUBVEC: {
SDValue Src1 = Op.getOperand(1);
SDValue Src2 = Op.getOperand(2);
SDValue Src3 = Op.getOperand(3);
@@ -15982,6 +15983,14 @@ static SDValue LowerINTRINSIC_WO_CHAIN(S
if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK)
Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3);
+ else if (IntrData->Type == INSERT_SUBVEC) {
+ // imm should be adapted to ISD::INSERT_SUBVECTOR behavior
+ assert(isa<ConstantSDNode>(Src3) && "Expected a ConstantSDNode here!");
+ unsigned Imm = cast<ConstantSDNode>(Src3)->getZExtValue();
+ Imm *= Src2.getValueType().getVectorNumElements();
+ Src3 = DAG.getTargetConstant(Imm, dl, MVT::i32);
+ }
+
// We specify 2 possible opcodes for intrinsics with rounding modes.
// First, we check if the intrinsic may have non-default rounding mode,
// (IntrData->Opc1 != 0), then we check the rounding mode operand.
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Sep 20 01:52:42 2015
@@ -471,84 +471,123 @@ def : Pat<(v8f64 immAllZerosV), (AVX512_
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
-
-multiclass vinsert_for_size_no_alt<int Opcode,
- X86VectorVTInfo From, X86VectorVTInfo To,
- PatFrag vinsert_insert,
- SDNodeXForm INSERT_get_vinsert_imm> {
+multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To,
+ PatFrag vinsert_insert> {
let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
- def rr : AVX512AIi8<Opcode, MRMSrcReg, (outs VR512:$dst),
- (ins VR512:$src1, From.RC:$src2, u8imm:$src3),
- "vinsert" # From.EltTypeName # "x" # From.NumElts #
- "\t{$src3, $src2, $src1, $dst|"
- "$dst, $src1, $src2, $src3}",
- [(set To.RC:$dst, (vinsert_insert:$src3 (To.VT VR512:$src1),
- (From.VT From.RC:$src2),
- (iPTR imm)))]>,
- EVEX_4V, EVEX_V512;
-
- let mayLoad = 1 in
- def rm : AVX512AIi8<Opcode, MRMSrcMem, (outs VR512:$dst),
- (ins VR512:$src1, From.MemOp:$src2, u8imm:$src3),
- "vinsert" # From.EltTypeName # "x" # From.NumElts #
- "\t{$src3, $src2, $src1, $dst|"
- "$dst, $src1, $src2, $src3}",
- []>,
- EVEX_4V, EVEX_V512, EVEX_CD8<From.EltSize, From.CD8TupleForm>;
+ defm rr : AVX512_maskable<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
+ (ins To.RC:$src1, From.RC:$src2, i32u8imm:$src3),
+ "vinsert" # From.EltTypeName # "x" # From.NumElts,
+ "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (vinsert_insert:$src3 (To.VT To.RC:$src1),
+ (From.VT From.RC:$src2),
+ (iPTR imm))>, AVX512AIi8Base, EVEX_4V;
+
+ let mayLoad = 1 in
+ defm rm : AVX512_maskable<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
+ (ins To.RC:$src1, From.MemOp:$src2, i32u8imm:$src3),
+ "vinsert" # From.EltTypeName # "x" # From.NumElts,
+ "$src3, $src2, $src1", "$src1, $src2, $src3",
+ (vinsert_insert:$src3 (To.VT To.RC:$src1),
+ (From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (iPTR imm))>, AVX512AIi8Base, EVEX_4V,
+ EVEX_CD8<From.EltSize, From.CD8TupleForm>;
}
}
-multiclass vinsert_for_size<int Opcode,
- X86VectorVTInfo From, X86VectorVTInfo To,
- X86VectorVTInfo AltFrom, X86VectorVTInfo AltTo,
- PatFrag vinsert_insert,
- SDNodeXForm INSERT_get_vinsert_imm> :
- vinsert_for_size_no_alt<Opcode, From, To,
- vinsert_insert, INSERT_get_vinsert_imm> {
- // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for
- // vinserti32x4. Only add this if 64x2 and friends are not supported
- // natively via AVX512DQ.
- let Predicates = [NoDQI] in
+multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
+ X86VectorVTInfo To, PatFrag vinsert_insert,
+ SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
+ let Predicates = p in {
+ def : Pat<(vinsert_insert:$ins
+ (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)),
+ (To.VT (!cast<Instruction>(InstrStr#"rr")
+ To.RC:$src1, From.RC:$src2,
+ (INSERT_get_vinsert_imm To.RC:$ins)))>;
+
def : Pat<(vinsert_insert:$ins
- (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)),
- (AltTo.VT (!cast<Instruction>(NAME # From.EltSize # "x4rr")
- VR512:$src1, From.RC:$src2,
- (INSERT_get_vinsert_imm VR512:$ins)))>;
+ (To.VT To.RC:$src1),
+ (From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (iPTR imm)),
+ (To.VT (!cast<Instruction>(InstrStr#"rm")
+ To.RC:$src1, addr:$src2,
+ (INSERT_get_vinsert_imm To.RC:$ins)))>;
+ }
}
multiclass vinsert_for_type<ValueType EltVT32, int Opcode128,
ValueType EltVT64, int Opcode256> {
- defm NAME # "32x4" : vinsert_for_size<Opcode128,
+
+ let Predicates = [HasVLX] in
+ defm NAME # "32x4Z256" : vinsert_for_size<Opcode128,
+ X86VectorVTInfo< 4, EltVT32, VR128X>,
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ vinsert128_insert>, EVEX_V256;
+
+ defm NAME # "32x4Z" : vinsert_for_size<Opcode128,
X86VectorVTInfo< 4, EltVT32, VR128X>,
X86VectorVTInfo<16, EltVT32, VR512>,
- X86VectorVTInfo< 2, EltVT64, VR128X>,
+ vinsert128_insert>, EVEX_V512;
+
+ defm NAME # "64x4Z" : vinsert_for_size<Opcode256,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
X86VectorVTInfo< 8, EltVT64, VR512>,
- vinsert128_insert,
- INSERT_get_vinsert128_imm>;
- let Predicates = [HasDQI] in
- defm NAME # "64x2" : vinsert_for_size_no_alt<Opcode128,
+ vinsert256_insert>, VEX_W, EVEX_V512;
+
+ let Predicates = [HasVLX, HasDQI] in
+ defm NAME # "64x2Z256" : vinsert_for_size<Opcode128,
+ X86VectorVTInfo< 2, EltVT64, VR128X>,
+ X86VectorVTInfo< 4, EltVT64, VR256X>,
+ vinsert128_insert>, VEX_W, EVEX_V256;
+
+ let Predicates = [HasDQI] in {
+ defm NAME # "64x2Z" : vinsert_for_size<Opcode128,
X86VectorVTInfo< 2, EltVT64, VR128X>,
X86VectorVTInfo< 8, EltVT64, VR512>,
- vinsert128_insert,
- INSERT_get_vinsert128_imm>, VEX_W;
- defm NAME # "64x4" : vinsert_for_size<Opcode256,
- X86VectorVTInfo< 4, EltVT64, VR256X>,
- X86VectorVTInfo< 8, EltVT64, VR512>,
- X86VectorVTInfo< 8, EltVT32, VR256>,
- X86VectorVTInfo<16, EltVT32, VR512>,
- vinsert256_insert,
- INSERT_get_vinsert256_imm>, VEX_W;
- let Predicates = [HasDQI] in
- defm NAME # "32x8" : vinsert_for_size_no_alt<Opcode256,
- X86VectorVTInfo< 8, EltVT32, VR256X>,
- X86VectorVTInfo<16, EltVT32, VR512>,
- vinsert256_insert,
- INSERT_get_vinsert256_imm>;
+ vinsert128_insert>, VEX_W, EVEX_V512;
+
+ defm NAME # "32x8Z" : vinsert_for_size<Opcode256,
+ X86VectorVTInfo< 8, EltVT32, VR256X>,
+ X86VectorVTInfo<16, EltVT32, VR512>,
+ vinsert256_insert>, EVEX_V512;
+ }
}
defm VINSERTF : vinsert_for_type<f32, 0x18, f64, 0x1a>;
defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
+// Codegen pattern with the alternative types,
+// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
+
+defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
+
+defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
+defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
+
+// Codegen pattern with the alternative types insert VEC128 into VEC256
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
+// Codegen pattern with the alternative types insert VEC128 into VEC512
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info,
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
+// Codegen pattern with the alternative types insert VEC256 into VEC512
+defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
+defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
+
// vinsertps - insert f32 to XMM
def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Sun Sep 20 01:52:42 2015
@@ -7898,7 +7898,7 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrc
[]>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L;
}
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
(iPTR imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2,
@@ -8560,7 +8560,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSr
[]>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
}
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
(iPTR imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2,
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sun Sep 20 01:52:42 2015
@@ -29,7 +29,7 @@ enum IntrinsicType {
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
- EXPAND_FROM_MEM, BLEND
+ EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC
};
struct IntrinsicData {
@@ -697,6 +697,30 @@ static const IntrinsicData IntrinsicsWi
X86ISD::VGETMANT, 0),
X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
X86ISD::VGETMANT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC,
+ ISD::INSERT_SUBVECTOR, 0),
X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK,
ISD::CTLZ, 0),
X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK,
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Sun Sep 20 01:52:42 2015
@@ -231,3 +231,102 @@ define i8 @test17(i1 *%addr, i8 %a) {
ret i8 %x2
}
+define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) {
+; KNL-LABEL: test_insert_128_v8i64:
+; KNL: ## BB#0:
+; KNL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
+; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_insert_128_v8i64:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
+; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %r = insertelement <8 x i64> %x, i64 %y, i32 1
+ ret <8 x i64> %r
+}
+
+define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) {
+; KNL-LABEL: test_insert_128_v16i32:
+; KNL: ## BB#0:
+; KNL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1
+; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_insert_128_v16i32:
+; SKX: ## BB#0:
+; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1
+; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %r = insertelement <16 x i32> %x, i32 %y, i32 1
+ ret <16 x i32> %r
+}
+
+define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
+; KNL-LABEL: test_insert_128_v8f64:
+; KNL: ## BB#0:
+; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
+; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_insert_128_v8f64:
+; SKX: ## BB#0:
+; SKX-NEXT: vunpcklpd %xmm1, %xmm0, %xmm1
+; SKX-NEXT: vinsertf64x2 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %r = insertelement <8 x double> %x, double %y, i32 1
+ ret <8 x double> %r
+}
+
+define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) {
+; KNL-LABEL: test_insert_128_v16f32:
+; KNL: ## BB#0:
+; KNL-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm1
+; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_insert_128_v16f32:
+; SKX: ## BB#0:
+; SKX-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm1
+; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
+; SKX-NEXT: retq
+ %r = insertelement <16 x float> %x, float %y, i32 1
+ ret <16 x float> %r
+}
+
+define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) {
+; KNL-LABEL: test_insert_128_v16i16:
+; KNL: ## BB#0:
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_insert_128_v16i16:
+; SKX: ## BB#0:
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1
+; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %r = insertelement <16 x i16> %x, i16 %y, i32 10
+ ret <16 x i16> %r
+}
+
+define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) {
+; KNL-LABEL: test_insert_128_v32i8:
+; KNL: ## BB#0:
+; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; KNL-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
+; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test_insert_128_v32i8:
+; SKX: ## BB#0:
+; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; SKX-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
+; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; SKX-NEXT: retq
+ %r = insertelement <32 x i8> %x, i8 %y, i32 20
+ ret <32 x i8> %r
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Sun Sep 20 01:52:42 2015
@@ -4080,3 +4080,86 @@ define <16 x float>@test_int_x86_avx512_
ret <16 x float> %res2
}
+declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8)
+
+define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 %x4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 -1)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i8 %x4)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i8)
+
+define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 %x4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 -1)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i8 %x4)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res2, %res3
+ ret <16 x i32> %res4
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res2, %res3
+ ret <8 x double> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0
+; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res2, %res3
+ ret <8 x i64> %res4
+}
+
+
Modified: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics.ll Sun Sep 20 01:52:42 2015
@@ -356,3 +356,83 @@ define <8 x float>@test_int_x86_avx512_m
%res4 = fadd <8 x float> %res2, %res3
ret <8 x float> %res4
}
+
+declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16)
+
+define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res2, %res3
+ ret <16 x float> %res4
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8)
+
+define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res3, %res2
+ ret <8 x double> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res3, %res2
+ ret <16 x i32> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res2, %res3
+ ret <8 x i64> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dqvl-intrinsics.ll Sun Sep 20 01:52:42 2015
@@ -1668,3 +1668,43 @@ define <2 x double>@test_int_x86_avx512_
%res4 = fadd <2 x double> %res3, %res2
ret <2 x double> %res4
}
+
+declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8)
+
+define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4)
+ %res3 = fadd <4 x double> %res, %res1
+ %res4 = fadd <4 x double> %res2, %res3
+ ret <4 x double> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovb %edi, %k1
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Sun Sep 20 01:52:42 2015
@@ -4744,3 +4744,45 @@ define <4 x i64>@test_int_x86_avx512_mas
ret <4 x i64> %res2
}
+declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
+
+define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4)
+ %res3 = fadd <8 x float> %res, %res1
+ %res4 = fadd <8 x float> %res2, %res3
+ ret <8 x float> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
+; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0
+; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
+
+ %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res2, %res3
+ ret <8 x i32> %res4
+}
Modified: llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-lzcnt-256.ll Sun Sep 20 01:52:42 2015
@@ -544,7 +544,7 @@ define <16 x i16> @testv16i16(<16 x i16>
; AVX512-NEXT: vpextrw $7, %xmm0, %eax
; AVX512-NEXT: lzcntw %ax, %ax
; AVX512-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
-; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 0)
ret <16 x i16> %out
@@ -742,7 +742,7 @@ define <16 x i16> @testv16i16u(<16 x i16
; AVX512-NEXT: vpextrw $7, %xmm0, %eax
; AVX512-NEXT: lzcntw %ax, %ax
; AVX512-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0
-; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 -1)
ret <16 x i16> %out
@@ -1214,7 +1214,7 @@ define <32 x i8> @testv32i8(<32 x i8> %i
; AVX512-NEXT: lzcntl %eax, %eax
; AVX512-NEXT: addl $-24, %eax
; AVX512-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
-; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0)
ret <32 x i8> %out
@@ -1620,7 +1620,7 @@ define <32 x i8> @testv32i8u(<32 x i8> %
; AVX512-NEXT: lzcntl %eax, %eax
; AVX512-NEXT: addl $-24, %eax
; AVX512-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0
-; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; AVX512-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
; AVX512-NEXT: retq
%out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1)
ret <32 x i8> %out
Modified: llvm/trunk/test/MC/X86/avx512-encodings.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/avx512-encodings.s?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/avx512-encodings.s (original)
+++ llvm/trunk/test/MC/X86/avx512-encodings.s Sun Sep 20 01:52:42 2015
@@ -14958,6 +14958,166 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0xf8,0xfb,0xff,0xff]
vgetexpsd -1032(%rdx), %xmm7, %xmm2
+// CHECK: vinsertf32x4 $171, %xmm3, %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0xdb,0xab]
+ vinsertf32x4 $0xab, %xmm3, %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $171, %xmm3, %zmm26, %zmm11 {%k1}
+// CHECK: encoding: [0x62,0x73,0x2d,0x41,0x18,0xdb,0xab]
+ vinsertf32x4 $0xab, %xmm3, %zmm26, %zmm11 {%k1}
+
+// CHECK: vinsertf32x4 $171, %xmm3, %zmm26, %zmm11 {%k1} {z}
+// CHECK: encoding: [0x62,0x73,0x2d,0xc1,0x18,0xdb,0xab]
+ vinsertf32x4 $0xab, %xmm3, %zmm26, %zmm11 {%k1} {z}
+
+// CHECK: vinsertf32x4 $123, %xmm3, %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0xdb,0x7b]
+ vinsertf32x4 $0x7b, %xmm3, %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, (%rcx), %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x19,0x7b]
+ vinsertf32x4 $0x7b, (%rcx), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, 291(%rax,%r14,8), %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x33,0x2d,0x40,0x18,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinsertf32x4 $0x7b, 291(%rax,%r14,8), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, 2032(%rdx), %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x5a,0x7f,0x7b]
+ vinsertf32x4 $0x7b, 2032(%rdx), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, 2048(%rdx), %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vinsertf32x4 $0x7b, 2048(%rdx), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, -2048(%rdx), %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x5a,0x80,0x7b]
+ vinsertf32x4 $0x7b, -2048(%rdx), %zmm26, %zmm11
+
+// CHECK: vinsertf32x4 $123, -2064(%rdx), %zmm26, %zmm11
+// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vinsertf32x4 $0x7b, -2064(%rdx), %zmm26, %zmm11
+
+// CHECK: vinsertf64x4 $171, %ymm7, %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0xcf,0xab]
+ vinsertf64x4 $0xab, %ymm7, %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $171, %ymm7, %zmm5, %zmm1 {%k1}
+// CHECK: encoding: [0x62,0xf3,0xd5,0x49,0x1a,0xcf,0xab]
+ vinsertf64x4 $0xab, %ymm7, %zmm5, %zmm1 {%k1}
+
+// CHECK: vinsertf64x4 $171, %ymm7, %zmm5, %zmm1 {%k1} {z}
+// CHECK: encoding: [0x62,0xf3,0xd5,0xc9,0x1a,0xcf,0xab]
+ vinsertf64x4 $0xab, %ymm7, %zmm5, %zmm1 {%k1} {z}
+
+// CHECK: vinsertf64x4 $123, %ymm7, %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0xcf,0x7b]
+ vinsertf64x4 $0x7b, %ymm7, %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, (%rcx), %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x09,0x7b]
+ vinsertf64x4 $0x7b, (%rcx), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, 291(%rax,%r14,8), %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xb3,0xd5,0x48,0x1a,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinsertf64x4 $0x7b, 291(%rax,%r14,8), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, 4064(%rdx), %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x4a,0x7f,0x7b]
+ vinsertf64x4 $0x7b, 4064(%rdx), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, 4096(%rdx), %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x8a,0x00,0x10,0x00,0x00,0x7b]
+ vinsertf64x4 $0x7b, 4096(%rdx), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, -4096(%rdx), %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x4a,0x80,0x7b]
+ vinsertf64x4 $0x7b, -4096(%rdx), %zmm5, %zmm1
+
+// CHECK: vinsertf64x4 $123, -4128(%rdx), %zmm5, %zmm1
+// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x8a,0xe0,0xef,0xff,0xff,0x7b]
+ vinsertf64x4 $0x7b, -4128(%rdx), %zmm5, %zmm1
+
+// CHECK: vinserti32x4 $171, %xmm10, %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xc3,0x15,0x48,0x38,0xca,0xab]
+ vinserti32x4 $0xab, %xmm10, %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $171, %xmm10, %zmm13, %zmm17 {%k6}
+// CHECK: encoding: [0x62,0xc3,0x15,0x4e,0x38,0xca,0xab]
+ vinserti32x4 $0xab, %xmm10, %zmm13, %zmm17 {%k6}
+
+// CHECK: vinserti32x4 $171, %xmm10, %zmm13, %zmm17 {%k6} {z}
+// CHECK: encoding: [0x62,0xc3,0x15,0xce,0x38,0xca,0xab]
+ vinserti32x4 $0xab, %xmm10, %zmm13, %zmm17 {%k6} {z}
+
+// CHECK: vinserti32x4 $123, %xmm10, %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xc3,0x15,0x48,0x38,0xca,0x7b]
+ vinserti32x4 $0x7b, %xmm10, %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, (%rcx), %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x09,0x7b]
+ vinserti32x4 $0x7b, (%rcx), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, 291(%rax,%r14,8), %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xa3,0x15,0x48,0x38,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinserti32x4 $0x7b, 291(%rax,%r14,8), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, 2032(%rdx), %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x4a,0x7f,0x7b]
+ vinserti32x4 $0x7b, 2032(%rdx), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, 2048(%rdx), %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vinserti32x4 $0x7b, 2048(%rdx), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, -2048(%rdx), %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x4a,0x80,0x7b]
+ vinserti32x4 $0x7b, -2048(%rdx), %zmm13, %zmm17
+
+// CHECK: vinserti32x4 $123, -2064(%rdx), %zmm13, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vinserti32x4 $0x7b, -2064(%rdx), %zmm13, %zmm17
+
+// CHECK: vinserti64x4 $171, %ymm4, %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xe4,0xab]
+ vinserti64x4 $0xab, %ymm4, %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $171, %ymm4, %zmm25, %zmm4 {%k1}
+// CHECK: encoding: [0x62,0xf3,0xb5,0x41,0x3a,0xe4,0xab]
+ vinserti64x4 $0xab, %ymm4, %zmm25, %zmm4 {%k1}
+
+// CHECK: vinserti64x4 $171, %ymm4, %zmm25, %zmm4 {%k1} {z}
+// CHECK: encoding: [0x62,0xf3,0xb5,0xc1,0x3a,0xe4,0xab]
+ vinserti64x4 $0xab, %ymm4, %zmm25, %zmm4 {%k1} {z}
+
+// CHECK: vinserti64x4 $123, %ymm4, %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xe4,0x7b]
+ vinserti64x4 $0x7b, %ymm4, %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, (%rcx), %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0x21,0x7b]
+ vinserti64x4 $0x7b, (%rcx), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, 291(%rax,%r14,8), %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xb3,0xb5,0x40,0x3a,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinserti64x4 $0x7b, 291(%rax,%r14,8), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, 4064(%rdx), %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0x62,0x7f,0x7b]
+ vinserti64x4 $0x7b, 4064(%rdx), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, 4096(%rdx), %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vinserti64x4 $0x7b, 4096(%rdx), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, -4096(%rdx), %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0x62,0x80,0x7b]
+ vinserti64x4 $0x7b, -4096(%rdx), %zmm25, %zmm4
+
+// CHECK: vinserti64x4 $123, -4128(%rdx), %zmm25, %zmm4
+// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vinserti64x4 $0x7b, -4128(%rdx), %zmm25, %zmm4
+
// CHECK: vextractf32x4 $171, %zmm21, %xmm15
// CHECK: encoding: [0x62,0xc3,0x7d,0x48,0x19,0xef,0xab]
vextractf32x4 $0xab, %zmm21, %xmm15
Modified: llvm/trunk/test/MC/X86/x86-64-avx512dq.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512dq.s?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512dq.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512dq.s Sun Sep 20 01:52:42 2015
@@ -2371,6 +2371,326 @@
// CHECK: encoding: [0x62,0xa1,0xff,0xca,0x7a,0xd5]
vcvtuqq2ps %zmm21, %ymm18 {%k2} {z}
+// CHECK: vinsertf32x8 $171, %ymm24, %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x03,0x75,0x40,0x1a,0xe8,0xab]
+ vinsertf32x8 $0xab, %ymm24, %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $171, %ymm24, %zmm17, %zmm29 {%k3}
+// CHECK: encoding: [0x62,0x03,0x75,0x43,0x1a,0xe8,0xab]
+ vinsertf32x8 $0xab, %ymm24, %zmm17, %zmm29 {%k3}
+
+// CHECK: vinsertf32x8 $171, %ymm24, %zmm17, %zmm29 {%k3} {z}
+// CHECK: encoding: [0x62,0x03,0x75,0xc3,0x1a,0xe8,0xab]
+ vinsertf32x8 $0xab, %ymm24, %zmm17, %zmm29 {%k3} {z}
+
+// CHECK: vinsertf32x8 $123, %ymm24, %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x03,0x75,0x40,0x1a,0xe8,0x7b]
+ vinsertf32x8 $0x7b, %ymm24, %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, (%rcx), %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0x29,0x7b]
+ vinsertf32x8 $0x7b,(%rcx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, 291(%rax,%r14,8), %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x23,0x75,0x40,0x1a,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinsertf32x8 $0x7b,291(%rax,%r14,8), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4064(%rdx), %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0x6a,0x7f,0x7b]
+ vinsertf32x8 $0x7b,4064(%rdx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4096(%rdx), %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0xaa,0x00,0x10,0x00,0x00,0x7b]
+ vinsertf32x8 $0x7b,4096(%rdx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, -4096(%rdx), %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0x6a,0x80,0x7b]
+ vinsertf32x8 $0x7b,-4096(%rdx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $123, -4128(%rdx), %zmm17, %zmm29
+// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+ vinsertf32x8 $0x7b,-4128(%rdx), %zmm17, %zmm29
+
+// CHECK: vinsertf32x8 $171, %ymm22, %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x23,0x1d,0x40,0x1a,0xee,0xab]
+ vinsertf32x8 $0xab, %ymm22, %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $171, %ymm22, %zmm28, %zmm29 {%k5}
+// CHECK: encoding: [0x62,0x23,0x1d,0x45,0x1a,0xee,0xab]
+ vinsertf32x8 $0xab, %ymm22, %zmm28, %zmm29 {%k5}
+
+// CHECK: vinsertf32x8 $171, %ymm22, %zmm28, %zmm29 {%k5} {z}
+// CHECK: encoding: [0x62,0x23,0x1d,0xc5,0x1a,0xee,0xab]
+ vinsertf32x8 $0xab, %ymm22, %zmm28, %zmm29 {%k5} {z}
+
+// CHECK: vinsertf32x8 $123, %ymm22, %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x23,0x1d,0x40,0x1a,0xee,0x7b]
+ vinsertf32x8 $0x7b, %ymm22, %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, (%rcx), %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0x29,0x7b]
+ vinsertf32x8 $0x7b,(%rcx), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4660(%rax,%r14,8), %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x23,0x1d,0x40,0x1a,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vinsertf32x8 $0x7b,4660(%rax,%r14,8), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4064(%rdx), %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0x6a,0x7f,0x7b]
+ vinsertf32x8 $0x7b,4064(%rdx), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, 4096(%rdx), %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0xaa,0x00,0x10,0x00,0x00,0x7b]
+ vinsertf32x8 $0x7b,4096(%rdx), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, -4096(%rdx), %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0x6a,0x80,0x7b]
+ vinsertf32x8 $0x7b,-4096(%rdx), %zmm28, %zmm29
+
+// CHECK: vinsertf32x8 $123, -4128(%rdx), %zmm28, %zmm29
+// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0xaa,0xe0,0xef,0xff,0xff,0x7b]
+ vinsertf32x8 $0x7b,-4128(%rdx), %zmm28, %zmm29
+
+// CHECK: vinsertf64x2 $171, %xmm25, %zmm28, %zmm17
+// CHECK: encoding: [0x62,0x83,0x9d,0x40,0x18,0xc9,0xab]
+ vinsertf64x2 $0xab, %xmm25, %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $171, %xmm25, %zmm28, %zmm17 {%k2}
+// CHECK: encoding: [0x62,0x83,0x9d,0x42,0x18,0xc9,0xab]
+ vinsertf64x2 $0xab, %xmm25, %zmm28, %zmm17 {%k2}
+
+// CHECK: vinsertf64x2 $171, %xmm25, %zmm28, %zmm17 {%k2} {z}
+// CHECK: encoding: [0x62,0x83,0x9d,0xc2,0x18,0xc9,0xab]
+ vinsertf64x2 $0xab, %xmm25, %zmm28, %zmm17 {%k2} {z}
+
+// CHECK: vinsertf64x2 $123, %xmm25, %zmm28, %zmm17
+// CHECK: encoding: [0x62,0x83,0x9d,0x40,0x18,0xc9,0x7b]
+ vinsertf64x2 $0x7b, %xmm25, %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, (%rcx), %zmm28, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x09,0x7b]
+ vinsertf64x2 $0x7b,(%rcx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, 291(%rax,%r14,8), %zmm28, %zmm17
+// CHECK: encoding: [0x62,0xa3,0x9d,0x40,0x18,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,291(%rax,%r14,8), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, 2032(%rdx), %zmm28, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x4a,0x7f,0x7b]
+ vinsertf64x2 $0x7b,2032(%rdx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, 2048(%rdx), %zmm28, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,2048(%rdx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, -2048(%rdx), %zmm28, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x4a,0x80,0x7b]
+ vinsertf64x2 $0x7b,-2048(%rdx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $123, -2064(%rdx), %zmm28, %zmm17
+// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vinsertf64x2 $0x7b,-2064(%rdx), %zmm28, %zmm17
+
+// CHECK: vinsertf64x2 $171, %xmm28, %zmm17, %zmm20
+// CHECK: encoding: [0x62,0x83,0xf5,0x40,0x18,0xe4,0xab]
+ vinsertf64x2 $0xab, %xmm28, %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $171, %xmm28, %zmm17, %zmm20 {%k7}
+// CHECK: encoding: [0x62,0x83,0xf5,0x47,0x18,0xe4,0xab]
+ vinsertf64x2 $0xab, %xmm28, %zmm17, %zmm20 {%k7}
+
+// CHECK: vinsertf64x2 $171, %xmm28, %zmm17, %zmm20 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0xf5,0xc7,0x18,0xe4,0xab]
+ vinsertf64x2 $0xab, %xmm28, %zmm17, %zmm20 {%k7} {z}
+
+// CHECK: vinsertf64x2 $123, %xmm28, %zmm17, %zmm20
+// CHECK: encoding: [0x62,0x83,0xf5,0x40,0x18,0xe4,0x7b]
+ vinsertf64x2 $0x7b, %xmm28, %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, (%rcx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0x21,0x7b]
+ vinsertf64x2 $0x7b,(%rcx), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, 4660(%rax,%r14,8), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xa3,0xf5,0x40,0x18,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,4660(%rax,%r14,8), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, 2032(%rdx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0x62,0x7f,0x7b]
+ vinsertf64x2 $0x7b,2032(%rdx), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, 2048(%rdx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,2048(%rdx), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, -2048(%rdx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0x62,0x80,0x7b]
+ vinsertf64x2 $0x7b,-2048(%rdx), %zmm17, %zmm20
+
+// CHECK: vinsertf64x2 $123, -2064(%rdx), %zmm17, %zmm20
+// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vinsertf64x2 $0x7b,-2064(%rdx), %zmm17, %zmm20
+
+// CHECK: vinserti32x8 $171, %ymm22, %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x23,0x3d,0x40,0x3a,0xe6,0xab]
+ vinserti32x8 $0xab, %ymm22, %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $171, %ymm22, %zmm24, %zmm28 {%k2}
+// CHECK: encoding: [0x62,0x23,0x3d,0x42,0x3a,0xe6,0xab]
+ vinserti32x8 $0xab, %ymm22, %zmm24, %zmm28 {%k2}
+
+// CHECK: vinserti32x8 $171, %ymm22, %zmm24, %zmm28 {%k2} {z}
+// CHECK: encoding: [0x62,0x23,0x3d,0xc2,0x3a,0xe6,0xab]
+ vinserti32x8 $0xab, %ymm22, %zmm24, %zmm28 {%k2} {z}
+
+// CHECK: vinserti32x8 $123, %ymm22, %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x23,0x3d,0x40,0x3a,0xe6,0x7b]
+ vinserti32x8 $0x7b, %ymm22, %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, (%rcx), %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0x21,0x7b]
+ vinserti32x8 $0x7b,(%rcx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, 291(%rax,%r14,8), %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x23,0x3d,0x40,0x3a,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinserti32x8 $0x7b,291(%rax,%r14,8), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, 4064(%rdx), %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0x62,0x7f,0x7b]
+ vinserti32x8 $0x7b,4064(%rdx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, 4096(%rdx), %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vinserti32x8 $0x7b,4096(%rdx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, -4096(%rdx), %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0x62,0x80,0x7b]
+ vinserti32x8 $0x7b,-4096(%rdx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $123, -4128(%rdx), %zmm24, %zmm28
+// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vinserti32x8 $0x7b,-4128(%rdx), %zmm24, %zmm28
+
+// CHECK: vinserti32x8 $171, %ymm24, %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x03,0x5d,0x40,0x3a,0xe0,0xab]
+ vinserti32x8 $0xab, %ymm24, %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $171, %ymm24, %zmm20, %zmm28 {%k7}
+// CHECK: encoding: [0x62,0x03,0x5d,0x47,0x3a,0xe0,0xab]
+ vinserti32x8 $0xab, %ymm24, %zmm20, %zmm28 {%k7}
+
+// CHECK: vinserti32x8 $171, %ymm24, %zmm20, %zmm28 {%k7} {z}
+// CHECK: encoding: [0x62,0x03,0x5d,0xc7,0x3a,0xe0,0xab]
+ vinserti32x8 $0xab, %ymm24, %zmm20, %zmm28 {%k7} {z}
+
+// CHECK: vinserti32x8 $123, %ymm24, %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x03,0x5d,0x40,0x3a,0xe0,0x7b]
+ vinserti32x8 $0x7b, %ymm24, %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, (%rcx), %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0x21,0x7b]
+ vinserti32x8 $0x7b,(%rcx), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, 4660(%rax,%r14,8), %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x23,0x5d,0x40,0x3a,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vinserti32x8 $0x7b,4660(%rax,%r14,8), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, 4064(%rdx), %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0x62,0x7f,0x7b]
+ vinserti32x8 $0x7b,4064(%rdx), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, 4096(%rdx), %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0xa2,0x00,0x10,0x00,0x00,0x7b]
+ vinserti32x8 $0x7b,4096(%rdx), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, -4096(%rdx), %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0x62,0x80,0x7b]
+ vinserti32x8 $0x7b,-4096(%rdx), %zmm20, %zmm28
+
+// CHECK: vinserti32x8 $123, -4128(%rdx), %zmm20, %zmm28
+// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0xa2,0xe0,0xef,0xff,0xff,0x7b]
+ vinserti32x8 $0x7b,-4128(%rdx), %zmm20, %zmm28
+
+// CHECK: vinserti64x2 $171, %xmm26, %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x03,0xed,0x40,0x38,0xe2,0xab]
+ vinserti64x2 $0xab, %xmm26, %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $171, %xmm26, %zmm18, %zmm28 {%k7}
+// CHECK: encoding: [0x62,0x03,0xed,0x47,0x38,0xe2,0xab]
+ vinserti64x2 $0xab, %xmm26, %zmm18, %zmm28 {%k7}
+
+// CHECK: vinserti64x2 $171, %xmm26, %zmm18, %zmm28 {%k7} {z}
+// CHECK: encoding: [0x62,0x03,0xed,0xc7,0x38,0xe2,0xab]
+ vinserti64x2 $0xab, %xmm26, %zmm18, %zmm28 {%k7} {z}
+
+// CHECK: vinserti64x2 $123, %xmm26, %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x03,0xed,0x40,0x38,0xe2,0x7b]
+ vinserti64x2 $0x7b, %xmm26, %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, (%rcx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x21,0x7b]
+ vinserti64x2 $0x7b,(%rcx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, 291(%rax,%r14,8), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x23,0xed,0x40,0x38,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,291(%rax,%r14,8), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, 2032(%rdx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x62,0x7f,0x7b]
+ vinserti64x2 $0x7b,2032(%rdx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, 2048(%rdx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0xa2,0x00,0x08,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,2048(%rdx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, -2048(%rdx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x62,0x80,0x7b]
+ vinserti64x2 $0x7b,-2048(%rdx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $123, -2064(%rdx), %zmm18, %zmm28
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
+ vinserti64x2 $0x7b,-2064(%rdx), %zmm18, %zmm28
+
+// CHECK: vinserti64x2 $171, %xmm21, %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x23,0xed,0x40,0x38,0xdd,0xab]
+ vinserti64x2 $0xab, %xmm21, %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $171, %xmm21, %zmm18, %zmm27 {%k2}
+// CHECK: encoding: [0x62,0x23,0xed,0x42,0x38,0xdd,0xab]
+ vinserti64x2 $0xab, %xmm21, %zmm18, %zmm27 {%k2}
+
+// CHECK: vinserti64x2 $171, %xmm21, %zmm18, %zmm27 {%k2} {z}
+// CHECK: encoding: [0x62,0x23,0xed,0xc2,0x38,0xdd,0xab]
+ vinserti64x2 $0xab, %xmm21, %zmm18, %zmm27 {%k2} {z}
+
+// CHECK: vinserti64x2 $123, %xmm21, %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x23,0xed,0x40,0x38,0xdd,0x7b]
+ vinserti64x2 $0x7b, %xmm21, %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, (%rcx), %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x19,0x7b]
+ vinserti64x2 $0x7b,(%rcx), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, 4660(%rax,%r14,8), %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x23,0xed,0x40,0x38,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,4660(%rax,%r14,8), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, 2032(%rdx), %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x5a,0x7f,0x7b]
+ vinserti64x2 $0x7b,2032(%rdx), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, 2048(%rdx), %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,2048(%rdx), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, -2048(%rdx), %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x5a,0x80,0x7b]
+ vinserti64x2 $0x7b,-2048(%rdx), %zmm18, %zmm27
+
+// CHECK: vinserti64x2 $123, -2064(%rdx), %zmm18, %zmm27
+// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vinserti64x2 $0x7b,-2064(%rdx), %zmm18, %zmm27
+
// CHECK: vextractf32x8 $171, %zmm18, %ymm21
// CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x1b,0xd5,0xab]
vextractf32x8 $0xab, %zmm18, %ymm21
Modified: llvm/trunk/test/MC/X86/x86-64-avx512dq_vl.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512dq_vl.s?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512dq_vl.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512dq_vl.s Sun Sep 20 01:52:42 2015
@@ -3584,6 +3584,166 @@
// CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0xa2,0xf8,0xfb,0xff,0xff]
vcvtuqq2ps -1032(%rdx){1to4}, %xmm28
+// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xa5,0x20,0x18,0xef,0xab]
+ vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21 {%k7}
+// CHECK: encoding: [0x62,0xa3,0xa5,0x27,0x18,0xef,0xab]
+ vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21 {%k7}
+
+// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21 {%k7} {z}
+// CHECK: encoding: [0x62,0xa3,0xa5,0xa7,0x18,0xef,0xab]
+ vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21 {%k7} {z}
+
+// CHECK: vinsertf64x2 $123, %xmm23, %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xa5,0x20,0x18,0xef,0x7b]
+ vinsertf64x2 $0x7b, %xmm23, %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, (%rcx), %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0x29,0x7b]
+ vinsertf64x2 $0x7b,(%rcx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, 291(%rax,%r14,8), %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xa3,0xa5,0x20,0x18,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,291(%rax,%r14,8), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, 2032(%rdx), %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0x6a,0x7f,0x7b]
+ vinsertf64x2 $0x7b,2032(%rdx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, 2048(%rdx), %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0xaa,0x00,0x08,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,2048(%rdx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, -2048(%rdx), %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0x6a,0x80,0x7b]
+ vinsertf64x2 $0x7b,-2048(%rdx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $123, -2064(%rdx), %ymm27, %ymm21
+// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+ vinsertf64x2 $0x7b,-2064(%rdx), %ymm27, %ymm21
+
+// CHECK: vinsertf64x2 $171, %xmm27, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x03,0xc5,0x20,0x18,0xc3,0xab]
+ vinsertf64x2 $0xab, %xmm27, %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $171, %xmm27, %ymm23, %ymm24 {%k5}
+// CHECK: encoding: [0x62,0x03,0xc5,0x25,0x18,0xc3,0xab]
+ vinsertf64x2 $0xab, %xmm27, %ymm23, %ymm24 {%k5}
+
+// CHECK: vinsertf64x2 $171, %xmm27, %ymm23, %ymm24 {%k5} {z}
+// CHECK: encoding: [0x62,0x03,0xc5,0xa5,0x18,0xc3,0xab]
+ vinsertf64x2 $0xab, %xmm27, %ymm23, %ymm24 {%k5} {z}
+
+// CHECK: vinsertf64x2 $123, %xmm27, %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x03,0xc5,0x20,0x18,0xc3,0x7b]
+ vinsertf64x2 $0x7b, %xmm27, %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, (%rcx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x01,0x7b]
+ vinsertf64x2 $0x7b,(%rcx), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, 4660(%rax,%r14,8), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x23,0xc5,0x20,0x18,0x84,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,4660(%rax,%r14,8), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, 2032(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x42,0x7f,0x7b]
+ vinsertf64x2 $0x7b,2032(%rdx), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, 2048(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x82,0x00,0x08,0x00,0x00,0x7b]
+ vinsertf64x2 $0x7b,2048(%rdx), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, -2048(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x42,0x80,0x7b]
+ vinsertf64x2 $0x7b,-2048(%rdx), %ymm23, %ymm24
+
+// CHECK: vinsertf64x2 $123, -2064(%rdx), %ymm23, %ymm24
+// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x82,0xf0,0xf7,0xff,0xff,0x7b]
+ vinsertf64x2 $0x7b,-2064(%rdx), %ymm23, %ymm24
+
+// CHECK: vinserti64x2 $171, %xmm21, %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xa3,0xb5,0x20,0x38,0xdd,0xab]
+ vinserti64x2 $0xab, %xmm21, %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $171, %xmm21, %ymm25, %ymm19 {%k6}
+// CHECK: encoding: [0x62,0xa3,0xb5,0x26,0x38,0xdd,0xab]
+ vinserti64x2 $0xab, %xmm21, %ymm25, %ymm19 {%k6}
+
+// CHECK: vinserti64x2 $171, %xmm21, %ymm25, %ymm19 {%k6} {z}
+// CHECK: encoding: [0x62,0xa3,0xb5,0xa6,0x38,0xdd,0xab]
+ vinserti64x2 $0xab, %xmm21, %ymm25, %ymm19 {%k6} {z}
+
+// CHECK: vinserti64x2 $123, %xmm21, %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xa3,0xb5,0x20,0x38,0xdd,0x7b]
+ vinserti64x2 $0x7b, %xmm21, %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, (%rcx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x19,0x7b]
+ vinserti64x2 $0x7b,(%rcx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, 291(%rax,%r14,8), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xa3,0xb5,0x20,0x38,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,291(%rax,%r14,8), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, 2032(%rdx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x5a,0x7f,0x7b]
+ vinserti64x2 $0x7b,2032(%rdx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, 2048(%rdx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x9a,0x00,0x08,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,2048(%rdx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, -2048(%rdx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x5a,0x80,0x7b]
+ vinserti64x2 $0x7b,-2048(%rdx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $123, -2064(%rdx), %ymm25, %ymm19
+// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
+ vinserti64x2 $0x7b,-2064(%rdx), %ymm25, %ymm19
+
+// CHECK: vinserti64x2 $171, %xmm25, %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x03,0xbd,0x20,0x38,0xe9,0xab]
+ vinserti64x2 $0xab, %xmm25, %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $171, %xmm25, %ymm24, %ymm29 {%k2}
+// CHECK: encoding: [0x62,0x03,0xbd,0x22,0x38,0xe9,0xab]
+ vinserti64x2 $0xab, %xmm25, %ymm24, %ymm29 {%k2}
+
+// CHECK: vinserti64x2 $171, %xmm25, %ymm24, %ymm29 {%k2} {z}
+// CHECK: encoding: [0x62,0x03,0xbd,0xa2,0x38,0xe9,0xab]
+ vinserti64x2 $0xab, %xmm25, %ymm24, %ymm29 {%k2} {z}
+
+// CHECK: vinserti64x2 $123, %xmm25, %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x03,0xbd,0x20,0x38,0xe9,0x7b]
+ vinserti64x2 $0x7b, %xmm25, %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, (%rcx), %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0x29,0x7b]
+ vinserti64x2 $0x7b,(%rcx), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, 4660(%rax,%r14,8), %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x23,0xbd,0x20,0x38,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,4660(%rax,%r14,8), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, 2032(%rdx), %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0x6a,0x7f,0x7b]
+ vinserti64x2 $0x7b,2032(%rdx), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, 2048(%rdx), %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0xaa,0x00,0x08,0x00,0x00,0x7b]
+ vinserti64x2 $0x7b,2048(%rdx), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, -2048(%rdx), %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0x6a,0x80,0x7b]
+ vinserti64x2 $0x7b,-2048(%rdx), %ymm24, %ymm29
+
+// CHECK: vinserti64x2 $123, -2064(%rdx), %ymm24, %ymm29
+// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0xaa,0xf0,0xf7,0xff,0xff,0x7b]
+ vinserti64x2 $0x7b,-2064(%rdx), %ymm24, %ymm29
+
// CHECK: vextractf64x2 $171, %ymm21, %xmm27
// CHECK: encoding: [0x62,0x83,0xfd,0x28,0x19,0xeb,0xab]
vextractf64x2 $0xab, %ymm21, %xmm27
Modified: llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s?rev=248111&r1=248110&r2=248111&view=diff
==============================================================================
--- llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s (original)
+++ llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s Sun Sep 20 01:52:42 2015
@@ -19739,6 +19739,86 @@ vaddpd {rz-sae}, %zmm2, %zmm1, %zmm1
// CHECK: encoding: [0x62,0xe1,0xe5,0x30,0x6d,0xa2,0xf8,0xfb,0xff,0xff]
vpunpckhqdq -1032(%rdx){1to4}, %ymm19, %ymm20
+// CHECK: vinsertf32x4 $171, %xmm27, %ymm18, %ymm18
+// CHECK: encoding: [0x62,0x83,0x6d,0x20,0x18,0xd3,0xab]
+ vinsertf32x4 $0xab, %xmm27, %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $171, %xmm27, %ymm18, %ymm18 {%k7}
+// CHECK: encoding: [0x62,0x83,0x6d,0x27,0x18,0xd3,0xab]
+ vinsertf32x4 $0xab, %xmm27, %ymm18, %ymm18 {%k7}
+
+// CHECK: vinsertf32x4 $171, %xmm27, %ymm18, %ymm18 {%k7} {z}
+// CHECK: encoding: [0x62,0x83,0x6d,0xa7,0x18,0xd3,0xab]
+ vinsertf32x4 $0xab, %xmm27, %ymm18, %ymm18 {%k7} {z}
+
+// CHECK: vinsertf32x4 $123, %xmm27, %ymm18, %ymm18
+// CHECK: encoding: [0x62,0x83,0x6d,0x20,0x18,0xd3,0x7b]
+ vinsertf32x4 $0x7b, %xmm27, %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, (%rcx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x11,0x7b]
+ vinsertf32x4 $0x7b, (%rcx), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, 291(%rax,%r14,8), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xa3,0x6d,0x20,0x18,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinsertf32x4 $0x7b, 291(%rax,%r14,8), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, 2032(%rdx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x52,0x7f,0x7b]
+ vinsertf32x4 $0x7b, 2032(%rdx), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, 2048(%rdx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x92,0x00,0x08,0x00,0x00,0x7b]
+ vinsertf32x4 $0x7b, 2048(%rdx), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, -2048(%rdx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x52,0x80,0x7b]
+ vinsertf32x4 $0x7b, -2048(%rdx), %ymm18, %ymm18
+
+// CHECK: vinsertf32x4 $123, -2064(%rdx), %ymm18, %ymm18
+// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x92,0xf0,0xf7,0xff,0xff,0x7b]
+ vinsertf32x4 $0x7b, -2064(%rdx), %ymm18, %ymm18
+
+// CHECK: vinserti32x4 $171, %xmm24, %ymm28, %ymm17
+// CHECK: encoding: [0x62,0x83,0x1d,0x20,0x38,0xc8,0xab]
+ vinserti32x4 $0xab, %xmm24, %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $171, %xmm24, %ymm28, %ymm17 {%k3}
+// CHECK: encoding: [0x62,0x83,0x1d,0x23,0x38,0xc8,0xab]
+ vinserti32x4 $0xab, %xmm24, %ymm28, %ymm17 {%k3}
+
+// CHECK: vinserti32x4 $171, %xmm24, %ymm28, %ymm17 {%k3} {z}
+// CHECK: encoding: [0x62,0x83,0x1d,0xa3,0x38,0xc8,0xab]
+ vinserti32x4 $0xab, %xmm24, %ymm28, %ymm17 {%k3} {z}
+
+// CHECK: vinserti32x4 $123, %xmm24, %ymm28, %ymm17
+// CHECK: encoding: [0x62,0x83,0x1d,0x20,0x38,0xc8,0x7b]
+ vinserti32x4 $0x7b, %xmm24, %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, (%rcx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x09,0x7b]
+ vinserti32x4 $0x7b, (%rcx), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, 291(%rax,%r14,8), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xa3,0x1d,0x20,0x38,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b]
+ vinserti32x4 $0x7b, 291(%rax,%r14,8), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, 2032(%rdx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x4a,0x7f,0x7b]
+ vinserti32x4 $0x7b, 2032(%rdx), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, 2048(%rdx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x8a,0x00,0x08,0x00,0x00,0x7b]
+ vinserti32x4 $0x7b, 2048(%rdx), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, -2048(%rdx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x4a,0x80,0x7b]
+ vinserti32x4 $0x7b, -2048(%rdx), %ymm28, %ymm17
+
+// CHECK: vinserti32x4 $123, -2064(%rdx), %ymm28, %ymm17
+// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x8a,0xf0,0xf7,0xff,0xff,0x7b]
+ vinserti32x4 $0x7b, -2064(%rdx), %ymm28, %ymm17
+
// CHECK: vextractf32x4 $171, %ymm17, %xmm28
// CHECK: encoding: [0x62,0x83,0x7d,0x28,0x19,0xcc,0xab]
vextractf32x4 $0xab, %ymm17, %xmm28
More information about the llvm-commits
mailing list