[llvm] r311091 - [AVX512] Don't switch unmasked subvector insert/extract instructions when AVX512DQI is enabled.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 17 08:40:25 PDT 2017
Author: ctopper
Date: Thu Aug 17 08:40:25 2017
New Revision: 311091
URL: http://llvm.org/viewvc/llvm-project?rev=311091&view=rev
Log:
[AVX512] Don't switch unmasked subvector insert/extract instructions when AVX512DQI is enabled.
There's no reason to switch instructions with and without DQI. It just creates extra isel patterns and test divergences.
There is however value in enabling the masked version of the instructions with DQI.
This required introducing some new multiclasses to enabling this splitting.
Differential Revision: https://reviews.llvm.org/D36661
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/compress_expand.ll
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll
llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll
llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Aug 17 08:40:25 2017
@@ -285,6 +285,28 @@ multiclass AVX512_maskable_fp_common<bit
// This multiclass generates the unconditional/non-masking, the masking and
// the zero-masking variant of the vector instruction. In the masking case, the
// perserved vector elements come from a new dummy input operand tied to $dst.
+// This version uses a separate dag for non-masking and masking.
+multiclass AVX512_maskable_split<bits<8> O, Format F, X86VectorVTInfo _,
+ dag Outs, dag Ins, string OpcodeStr,
+ string AttSrcAsm, string IntelSrcAsm,
+ dag RHS, dag MaskRHS,
+ InstrItinClass itin = NoItinerary,
+ bit IsCommutable = 0, bit IsKCommutable = 0,
+ SDNode Select = vselect> :
+ AVX512_maskable_custom<O, F, Outs, Ins,
+ !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
+ !con((ins _.KRCWM:$mask), Ins),
+ OpcodeStr, AttSrcAsm, IntelSrcAsm,
+ [(set _.RC:$dst, RHS)],
+ [(set _.RC:$dst,
+ (Select _.KRCWM:$mask, MaskRHS, _.RC:$src0))],
+ [(set _.RC:$dst,
+ (Select _.KRCWM:$mask, MaskRHS, _.ImmAllZerosV))],
+ "$src0 = $dst", itin, IsCommutable, IsKCommutable>;
+
+// This multiclass generates the unconditional/non-masking, the masking and
+// the zero-masking variant of the vector instruction. In the masking case, the
+// perserved vector elements come from a new dummy input operand tied to $dst.
multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
@@ -512,28 +534,45 @@ let isReMaterializable = 1, isAsCheapAsA
//===----------------------------------------------------------------------===//
// AVX-512 - VECTOR INSERT
//
-multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From, X86VectorVTInfo To,
- PatFrag vinsert_insert> {
+
+// Supports two different pattern operators for mask and unmasked ops. Allows
+// null_frag to be passed for one.
+multiclass vinsert_for_size_split<int Opcode, X86VectorVTInfo From,
+ X86VectorVTInfo To,
+ SDPatternOperator vinsert_insert,
+ SDPatternOperator vinsert_for_mask> {
let ExeDomain = To.ExeDomain in {
- defm rr : AVX512_maskable<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
+ defm rr : AVX512_maskable_split<Opcode, MRMSrcReg, To, (outs To.RC:$dst),
(ins To.RC:$src1, From.RC:$src2, u8imm:$src3),
"vinsert" # From.EltTypeName # "x" # From.NumElts,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(vinsert_insert:$src3 (To.VT To.RC:$src1),
(From.VT From.RC:$src2),
- (iPTR imm))>, AVX512AIi8Base, EVEX_4V;
+ (iPTR imm)),
+ (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
+ (From.VT From.RC:$src2),
+ (iPTR imm))>, AVX512AIi8Base, EVEX_4V;
- defm rm : AVX512_maskable<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
+ defm rm : AVX512_maskable_split<Opcode, MRMSrcMem, To, (outs To.RC:$dst),
(ins To.RC:$src1, From.MemOp:$src2, u8imm:$src3),
"vinsert" # From.EltTypeName # "x" # From.NumElts,
"$src3, $src2, $src1", "$src1, $src2, $src3",
(vinsert_insert:$src3 (To.VT To.RC:$src1),
(From.VT (bitconvert (From.LdFrag addr:$src2))),
+ (iPTR imm)),
+ (vinsert_for_mask:$src3 (To.VT To.RC:$src1),
+ (From.VT (bitconvert (From.LdFrag addr:$src2))),
(iPTR imm))>, AVX512AIi8Base, EVEX_4V,
EVEX_CD8<From.EltSize, From.CD8TupleForm>;
}
}
+// Passes the same pattern operator for masked and unmasked ops.
+multiclass vinsert_for_size<int Opcode, X86VectorVTInfo From,
+ X86VectorVTInfo To,
+ SDPatternOperator vinsert_insert> :
+ vinsert_for_size_split<Opcode, From, To, vinsert_insert, vinsert_insert>;
+
multiclass vinsert_for_size_lowering<string InstrStr, X86VectorVTInfo From,
X86VectorVTInfo To, PatFrag vinsert_insert,
SDNodeXForm INSERT_get_vinsert_imm , list<Predicate> p> {
@@ -573,22 +612,24 @@ multiclass vinsert_for_type<ValueType El
X86VectorVTInfo< 8, EltVT64, VR512>,
vinsert256_insert>, VEX_W, EVEX_V512;
+ // Even with DQI we'd like to only use these instructions for masking.
let Predicates = [HasVLX, HasDQI] in
- defm NAME # "64x2Z256" : vinsert_for_size<Opcode128,
+ defm NAME # "64x2Z256" : vinsert_for_size_split<Opcode128,
X86VectorVTInfo< 2, EltVT64, VR128X>,
X86VectorVTInfo< 4, EltVT64, VR256X>,
- vinsert128_insert>, VEX_W, EVEX_V256;
+ null_frag, vinsert128_insert>, VEX_W, EVEX_V256;
+ // Even with DQI we'd like to only use these instructions for masking.
let Predicates = [HasDQI] in {
- defm NAME # "64x2Z" : vinsert_for_size<Opcode128,
+ defm NAME # "64x2Z" : vinsert_for_size_split<Opcode128,
X86VectorVTInfo< 2, EltVT64, VR128X>,
X86VectorVTInfo< 8, EltVT64, VR512>,
- vinsert128_insert>, VEX_W, EVEX_V512;
+ null_frag, vinsert128_insert>, VEX_W, EVEX_V512;
- defm NAME # "32x8Z" : vinsert_for_size<Opcode256,
+ defm NAME # "32x8Z" : vinsert_for_size_split<Opcode256,
X86VectorVTInfo< 8, EltVT32, VR256X>,
X86VectorVTInfo<16, EltVT32, VR512>,
- vinsert256_insert>, EVEX_V512;
+ null_frag, vinsert256_insert>, EVEX_V512;
}
}
@@ -596,21 +637,21 @@ defm VINSERTF : vinsert_for_type<f32, 0x
defm VINSERTI : vinsert_for_type<i32, 0x38, i64, 0x3a>;
// Codegen pattern with the alternative types,
-// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
+// Even with AVX512DQ we'll still use these for unmasked operations.
defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info,
- vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info,
- vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>;
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>;
defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info,
- vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info,
- vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>;
+ vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info,
- vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info,
- vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>;
+ vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
// Codegen pattern with the alternative types insert VEC128 into VEC256
defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info,
@@ -647,16 +688,20 @@ def VINSERTPSZrm: AVX512AIi8<0x21, MRMSr
// AVX-512 VECTOR EXTRACT
//---
-multiclass vextract_for_size<int Opcode,
- X86VectorVTInfo From, X86VectorVTInfo To,
- PatFrag vextract_extract> {
+// Supports two different pattern operators for mask and unmasked ops. Allows
+// null_frag to be passed for one.
+multiclass vextract_for_size_split<int Opcode,
+ X86VectorVTInfo From, X86VectorVTInfo To,
+ SDPatternOperator vextract_extract,
+ SDPatternOperator vextract_for_mask> {
let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
- defm rr : AVX512_maskable<Opcode, MRMDestReg, To, (outs To.RC:$dst),
+ defm rr : AVX512_maskable_split<Opcode, MRMDestReg, To, (outs To.RC:$dst),
(ins From.RC:$src1, u8imm:$idx),
"vextract" # To.EltTypeName # "x" # To.NumElts,
"$idx, $src1", "$src1, $idx",
- (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm))>,
+ (vextract_extract:$idx (From.VT From.RC:$src1), (iPTR imm)),
+ (vextract_for_mask:$idx (From.VT From.RC:$src1), (iPTR imm))>,
AVX512AIi8Base, EVEX;
def mr : AVX512AIi8<Opcode, MRMDestMem, (outs),
(ins To.MemOp:$dst, From.RC:$src1, u8imm:$idx),
@@ -677,6 +722,12 @@ multiclass vextract_for_size<int Opcode,
}
}
+// Passes the same pattern operator for masked and unmasked ops.
+multiclass vextract_for_size<int Opcode, X86VectorVTInfo From,
+ X86VectorVTInfo To,
+ SDPatternOperator vextract_extract> :
+ vextract_for_size_split<Opcode, From, To, vextract_extract, vextract_extract>;
+
// Codegen pattern for the alternative types
multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
X86VectorVTInfo To, PatFrag vextract_extract,
@@ -713,22 +764,26 @@ multiclass vextract_for_type<ValueType E
X86VectorVTInfo< 4, EltVT32, VR128X>,
vextract128_extract>,
EVEX_V256, EVEX_CD8<32, CD8VT4>;
+
+ // Even with DQI we'd like to only use these instructions for masking.
let Predicates = [HasVLX, HasDQI] in
- defm NAME # "64x2Z256" : vextract_for_size<Opcode128,
+ defm NAME # "64x2Z256" : vextract_for_size_split<Opcode128,
X86VectorVTInfo< 4, EltVT64, VR256X>,
X86VectorVTInfo< 2, EltVT64, VR128X>,
- vextract128_extract>,
+ null_frag, vextract128_extract>,
VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
+
+ // Even with DQI we'd like to only use these instructions for masking.
let Predicates = [HasDQI] in {
- defm NAME # "64x2Z" : vextract_for_size<Opcode128,
+ defm NAME # "64x2Z" : vextract_for_size_split<Opcode128,
X86VectorVTInfo< 8, EltVT64, VR512>,
X86VectorVTInfo< 2, EltVT64, VR128X>,
- vextract128_extract>,
+ null_frag, vextract128_extract>,
VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
- defm NAME # "32x8Z" : vextract_for_size<Opcode256,
+ defm NAME # "32x8Z" : vextract_for_size_split<Opcode256,
X86VectorVTInfo<16, EltVT32, VR512>,
X86VectorVTInfo< 8, EltVT32, VR256X>,
- vextract256_extract>,
+ null_frag, vextract256_extract>,
EVEX_V512, EVEX_CD8<32, CD8VT8>;
}
}
@@ -737,21 +792,21 @@ defm VEXTRACTF : vextract_for_type<f32,
defm VEXTRACTI : vextract_for_type<i32, 0x39, i64, 0x3b>;
// extract_subvector codegen patterns with the alternative types.
-// Only add this if 64x2 and its friends are not supported natively via AVX512DQ.
+// Even with AVX512DQ we'll still use these for unmasked operations.
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z", v8f64_info, v2f64x_info,
- vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v8i64_info, v2i64x_info,
- vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512, NoDQI]>;
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTF64x4Z", v16f32_info, v8f32x_info,
- vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTI64x4Z", v16i32_info, v8i32x_info,
- vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512, NoDQI]>;
+ vextract256_extract, EXTRACT_get_vextract256_imm, [HasAVX512]>;
defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
- vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
- vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
+ vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
// Codegen pattern with the alternative types extract VEC128 from VEC256
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
Modified: llvm/trunk/test/CodeGen/X86/avx512-cvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cvt.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cvt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cvt.ll Thu Aug 17 08:40:25 2017
@@ -642,19 +642,12 @@ define <4 x i32> @fptosi03(<4 x double>
}
define <16 x float> @fptrunc00(<16 x double> %b) nounwind {
-; NODQ-LABEL: fptrunc00:
-; NODQ: # BB#0:
-; NODQ-NEXT: vcvtpd2ps %zmm0, %ymm0
-; NODQ-NEXT: vcvtpd2ps %zmm1, %ymm1
-; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
-; NODQ-NEXT: retq
-;
-; DQ-LABEL: fptrunc00:
-; DQ: # BB#0:
-; DQ-NEXT: vcvtpd2ps %zmm0, %ymm0
-; DQ-NEXT: vcvtpd2ps %zmm1, %ymm1
-; DQ-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0
-; DQ-NEXT: retq
+; ALL-LABEL: fptrunc00:
+; ALL: # BB#0:
+; ALL-NEXT: vcvtpd2ps %zmm0, %ymm0
+; ALL-NEXT: vcvtpd2ps %zmm1, %ymm1
+; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: retq
%a = fptrunc <16 x double> %b to <16 x float>
ret <16 x float> %a
}
@@ -876,21 +869,13 @@ define i32 @float_to_int(float %x) {
}
define <16 x double> @uitof64(<16 x i32> %a) nounwind {
-; NODQ-LABEL: uitof64:
-; NODQ: # BB#0:
-; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm2
-; NODQ-NEXT: vextractf64x4 $1, %zmm0, %ymm0
-; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm1
-; NODQ-NEXT: vmovaps %zmm2, %zmm0
-; NODQ-NEXT: retq
-;
-; DQ-LABEL: uitof64:
-; DQ: # BB#0:
-; DQ-NEXT: vcvtudq2pd %ymm0, %zmm2
-; DQ-NEXT: vextractf32x8 $1, %zmm0, %ymm0
-; DQ-NEXT: vcvtudq2pd %ymm0, %zmm1
-; DQ-NEXT: vmovaps %zmm2, %zmm0
-; DQ-NEXT: retq
+; ALL-LABEL: uitof64:
+; ALL: # BB#0:
+; ALL-NEXT: vcvtudq2pd %ymm0, %zmm2
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm0
+; ALL-NEXT: vcvtudq2pd %ymm0, %zmm1
+; ALL-NEXT: vmovaps %zmm2, %zmm0
+; ALL-NEXT: retq
%b = uitofp <16 x i32> %a to <16 x double>
ret <16 x double> %b
}
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Thu Aug 17 08:40:25 2017
@@ -20,25 +20,15 @@ define <16 x float> @test1(<16 x float>
}
define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind {
-; KNL-LABEL: test2:
-; KNL: ## BB#0:
-; KNL-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0]
-; KNL-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm2
-; KNL-NEXT: vextractf32x4 $3, %zmm0, %xmm0
-; KNL-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
-; KNL-NEXT: retq
-; KNL-NEXT: ## -- End function
-;
-; SKX-LABEL: test2:
-; SKX: ## BB#0:
-; SKX-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0]
-; SKX-NEXT: vinsertf64x2 $0, %xmm2, %zmm0, %zmm2
-; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm0
-; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
-; SKX-NEXT: vinsertf64x2 $3, %xmm0, %zmm2, %zmm0
-; SKX-NEXT: retq
-; SKX-NEXT: ## -- End function
+; CHECK-LABEL: test2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vmovhpd {{.*#+}} xmm2 = xmm0[0],mem[0]
+; CHECK-NEXT: vinsertf32x4 $0, %xmm2, %zmm0, %zmm2
+; CHECK-NEXT: vextractf32x4 $3, %zmm0, %xmm0
+; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
+; CHECK-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0
+; CHECK-NEXT: retq
+; CHECK-NEXT: ## -- End function
%rrr = load double, double* %br
%rrr2 = insertelement <8 x double> %x, double %rrr, i32 1
%rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6
@@ -59,23 +49,14 @@ define <16 x float> @test3(<16 x float>
}
define <8 x i64> @test4(<8 x i64> %x) nounwind {
-; KNL-LABEL: test4:
-; KNL: ## BB#0:
-; KNL-NEXT: vextracti32x4 $2, %zmm0, %xmm1
-; KNL-NEXT: vmovq %xmm1, %rax
-; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
-; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
-; KNL-NEXT: retq
-; KNL-NEXT: ## -- End function
-;
-; SKX-LABEL: test4:
-; SKX: ## BB#0:
-; SKX-NEXT: vextracti64x2 $2, %zmm0, %xmm1
-; SKX-NEXT: vmovq %xmm1, %rax
-; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
-; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
-; SKX-NEXT: retq
-; SKX-NEXT: ## -- End function
+; CHECK-LABEL: test4:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; CHECK-NEXT: vmovq %xmm1, %rax
+; CHECK-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1
+; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
+; CHECK-NEXT: ## -- End function
%eee = extractelement <8 x i64> %x, i32 4
%rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1
ret <8 x i64> %rrr2
@@ -477,7 +458,7 @@ define i64 @extract_v8i64(<8 x i64> %x,
; SKX-LABEL: extract_v8i64:
; SKX: ## BB#0:
; SKX-NEXT: vpextrq $1, %xmm0, %rax
-; SKX-NEXT: vextracti64x2 $1, %zmm0, %xmm0
+; SKX-NEXT: vextracti32x4 $1, %zmm0, %xmm0
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi)
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@@ -693,23 +674,14 @@ define i8 @extract_v16i8(<16 x i8> %x, i
}
define <8 x i64> @insert_v8i64(<8 x i64> %x, i64 %y , i64* %ptr) {
-; KNL-LABEL: insert_v8i64:
-; KNL: ## BB#0:
-; KNL-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
-; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1
-; KNL-NEXT: vextracti32x4 $1, %zmm0, %xmm0
-; KNL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
-; KNL-NEXT: vinserti32x4 $1, %xmm0, %zmm1, %zmm0
-; KNL-NEXT: retq
-;
-; SKX-LABEL: insert_v8i64:
-; SKX: ## BB#0:
-; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
-; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm1
-; SKX-NEXT: vextracti64x2 $1, %zmm0, %xmm0
-; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
-; SKX-NEXT: vinserti64x2 $1, %xmm0, %zmm1, %zmm0
-; SKX-NEXT: retq
+; CHECK-LABEL: insert_v8i64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
+; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1
+; CHECK-NEXT: vextracti32x4 $1, %zmm0, %xmm0
+; CHECK-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0
+; CHECK-NEXT: vinserti32x4 $1, %xmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
%val = load i64, i64* %ptr
%r1 = insertelement <8 x i64> %x, i64 %val, i32 1
%r2 = insertelement <8 x i64> %r1, i64 %y, i32 3
@@ -888,17 +860,11 @@ define <16 x i8> @insert_v16i8(<16 x i8>
}
define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) {
-; KNL-LABEL: test_insert_128_v8i64:
-; KNL: ## BB#0:
-; KNL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
-; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_insert_128_v8i64:
-; SKX: ## BB#0:
-; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
-; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0
-; SKX-NEXT: retq
+; CHECK-LABEL: test_insert_128_v8i64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1
+; CHECK-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%r = insertelement <8 x i64> %x, i64 %y, i32 1
ret <8 x i64> %r
}
@@ -914,17 +880,11 @@ define <16 x i32> @test_insert_128_v16i3
}
define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) {
-; KNL-LABEL: test_insert_128_v8f64:
-; KNL: ## BB#0:
-; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
-; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
-; KNL-NEXT: retq
-;
-; SKX-LABEL: test_insert_128_v8f64:
-; SKX: ## BB#0:
-; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
-; SKX-NEXT: vinsertf64x2 $0, %xmm1, %zmm0, %zmm0
-; SKX-NEXT: retq
+; CHECK-LABEL: test_insert_128_v8f64:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0]
+; CHECK-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0
+; CHECK-NEXT: retq
%r = insertelement <8 x double> %x, double %y, i32 1
ret <8 x double> %r
}
Modified: llvm/trunk/test/CodeGen/X86/avx512-trunc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-trunc.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-trunc.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-trunc.ll Thu Aug 17 08:40:25 2017
@@ -726,27 +726,16 @@ define <8 x i8> @usat_trunc_wb_128(<8 x
}
define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) {
-; KNL-LABEL: usat_trunc_qw_1024:
-; KNL: ## BB#0:
-; KNL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2
-; KNL-NEXT: vpminuq %zmm2, %zmm1, %zmm1
-; KNL-NEXT: vpminuq %zmm2, %zmm0, %zmm0
-; KNL-NEXT: vpmovqd %zmm0, %ymm0
-; KNL-NEXT: vpmovqd %zmm1, %ymm1
-; KNL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; KNL-NEXT: vpmovdw %zmm0, %ymm0
-; KNL-NEXT: retq
-;
-; SKX-LABEL: usat_trunc_qw_1024:
-; SKX: ## BB#0:
-; SKX-NEXT: vpbroadcastq {{.*}}(%rip), %zmm2
-; SKX-NEXT: vpminuq %zmm2, %zmm1, %zmm1
-; SKX-NEXT: vpminuq %zmm2, %zmm0, %zmm0
-; SKX-NEXT: vpmovqd %zmm0, %ymm0
-; SKX-NEXT: vpmovqd %zmm1, %ymm1
-; SKX-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; SKX-NEXT: vpmovdw %zmm0, %ymm0
-; SKX-NEXT: retq
+; ALL-LABEL: usat_trunc_qw_1024:
+; ALL: ## BB#0:
+; ALL-NEXT: vpbroadcastq {{.*#+}} zmm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
+; ALL-NEXT: vpminuq %zmm2, %zmm1, %zmm1
+; ALL-NEXT: vpminuq %zmm2, %zmm0, %zmm0
+; ALL-NEXT: vpmovqd %zmm0, %ymm0
+; ALL-NEXT: vpmovqd %zmm1, %ymm1
+; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; ALL-NEXT: vpmovdw %zmm0, %ymm0
+; ALL-NEXT: retq
%x3 = icmp ult <16 x i64> %i, <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
%x5 = select <16 x i1> %x3, <16 x i64> %i, <16 x i64> <i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535, i64 65535>
%x6 = trunc <16 x i64> %x5 to <16 x i16>
Modified: llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-intrinsics-upgrade.ll Thu Aug 17 08:40:25 2017
@@ -6,7 +6,7 @@ declare <2 x double> @llvm.x86.avx512.ma
define <2 x double>@test_int_x86_avx512_mask_vextractf64x2_512(<8 x double> %x0, <2 x double> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vextractf64x2_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vextractf64x2 $1, %zmm0, %xmm0
+; CHECK-NEXT: vextractf32x4 $1, %zmm0, %xmm0
; CHECK-NEXT: kmovw %edi, %k0
; CHECK-NEXT: kshiftlb $7, %k0, %k1
; CHECK-NEXT: kshiftrb $7, %k1, %k1
@@ -36,7 +36,7 @@ declare <8 x float> @llvm.x86.avx512.mas
define <8 x float>@test_int_x86_avx512_mask_vextractf32x8(<16 x float> %x0, <8 x float> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_vextractf32x8:
; CHECK: ## BB#0:
-; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm2
+; CHECK-NEXT: vextractf64x4 $1, %zmm0, %ymm2
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vextractf32x8 $1, %zmm0, %ymm1 {%k1}
; CHECK-NEXT: vaddps %ymm2, %ymm1, %ymm1
@@ -56,7 +56,7 @@ declare <16 x float> @llvm.x86.avx512.ma
define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm3
+; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 {%k1} {z}
@@ -76,7 +76,7 @@ declare <8 x double> @llvm.x86.avx512.ma
define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm3
+; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z}
@@ -96,7 +96,7 @@ declare <16 x i32> @llvm.x86.avx512.mask
define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm3
+; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 {%k1} {z}
@@ -116,7 +116,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.
define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) {
; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512:
; CHECK: ## BB#0:
-; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm3
+; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1}
; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 {%k1} {z}
@@ -162,7 +162,7 @@ define <16 x float>@test_int_x86_avx512_
; CHECK-LABEL: test_int_x86_avx512_mask_broadcastf32x8_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; CHECK-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm2
+; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm2
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vaddps %zmm1, %zmm2, %zmm1
@@ -231,7 +231,7 @@ define <16 x i32>@test_int_x86_avx512_ma
; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x8_512:
; CHECK: ## BB#0:
; CHECK-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; CHECK-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm2
+; CHECK-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm2
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vinserti32x8 $1, %ymm0, %zmm0, %zmm1 {%k1}
; CHECK-NEXT: vpaddd %zmm1, %zmm2, %zmm1
Modified: llvm/trunk/test/CodeGen/X86/compress_expand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/compress_expand.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/compress_expand.ll (original)
+++ llvm/trunk/test/CodeGen/X86/compress_expand.ll Thu Aug 17 08:40:25 2017
@@ -334,7 +334,7 @@ define <32 x float> @test15(float* %base
define <16 x double> @test16(double* %base, <16 x double> %src0, <16 x i32> %trigger) {
; SKX-LABEL: test16:
; SKX: # BB#0:
-; SKX-NEXT: vextracti32x8 $1, %zmm2, %ymm3
+; SKX-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; SKX-NEXT: vpxor %xmm4, %xmm4, %xmm4
; SKX-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
; SKX-NEXT: vpcmpeqd %ymm4, %ymm2, %k2
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Thu Aug 17 08:40:25 2017
@@ -422,10 +422,10 @@ define <16 x i32> @test8(<16 x i32*> %pt
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k3}
; SKX-NEXT: kmovw %k1, %k3
; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k3}
-; SKX-NEXT: vinserti32x8 $1, %ymm2, %zmm3, %zmm4
+; SKX-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm4
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1}
-; SKX-NEXT: vinserti32x8 $1, %ymm2, %zmm3, %zmm0
+; SKX-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
; SKX-NEXT: vpaddd %zmm0, %zmm4, %zmm0
; SKX-NEXT: retq
;
@@ -750,7 +750,7 @@ define <16 x float> @test14(float* %base
; SKX-NEXT: kshiftrw $8, %k1, %k2
; SKX-NEXT: vgatherqps (,%zmm0), %ymm1 {%k2}
; SKX-NEXT: vgatherqps (,%zmm0), %ymm2 {%k1}
-; SKX-NEXT: vinsertf32x8 $1, %ymm1, %zmm2, %zmm0
+; SKX-NEXT: vinsertf64x4 $1, %ymm1, %zmm2, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test14:
@@ -1686,11 +1686,11 @@ define <16 x i32> @test_gather_16i32(<16
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
-; SKX-NEXT: vextracti32x8 $1, %zmm3, %ymm2
+; SKX-NEXT: vextracti64x4 $1, %zmm3, %ymm2
; SKX-NEXT: kshiftrw $8, %k1, %k2
; SKX-NEXT: vpgatherqd (,%zmm1), %ymm2 {%k2}
; SKX-NEXT: vpgatherqd (,%zmm0), %ymm3 {%k1}
-; SKX-NEXT: vinserti32x8 $1, %ymm2, %zmm3, %zmm0
+; SKX-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test_gather_16i32:
@@ -1772,7 +1772,7 @@ define <16 x i64> @test_gather_16i64(<16
; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1
; SKX_32-NEXT: kshiftrw $8, %k1, %k2
; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm2 {%k1}
-; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; SKX_32-NEXT: vpgatherdq (,%ymm0), %zmm1 {%k2}
; SKX_32-NEXT: vmovdqa64 %zmm2, %zmm0
; SKX_32-NEXT: movl %ebp, %esp
@@ -1809,11 +1809,11 @@ define <16 x float> @test_gather_16f32(<
; SKX-NEXT: vpmovsxbd %xmm2, %zmm2
; SKX-NEXT: vpslld $31, %zmm2, %zmm2
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
-; SKX-NEXT: vextractf32x8 $1, %zmm3, %ymm2
+; SKX-NEXT: vextractf64x4 $1, %zmm3, %ymm2
; SKX-NEXT: kshiftrw $8, %k1, %k2
; SKX-NEXT: vgatherqps (,%zmm1), %ymm2 {%k2}
; SKX-NEXT: vgatherqps (,%zmm0), %ymm3 {%k1}
-; SKX-NEXT: vinsertf32x8 $1, %ymm2, %zmm3, %zmm0
+; SKX-NEXT: vinsertf64x4 $1, %ymm2, %zmm3, %zmm0
; SKX-NEXT: retq
;
; SKX_32-LABEL: test_gather_16f32:
@@ -1895,7 +1895,7 @@ define <16 x double> @test_gather_16f64(
; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1
; SKX_32-NEXT: kshiftrw $8, %k1, %k2
; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm2 {%k1}
-; SKX_32-NEXT: vextractf32x8 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; SKX_32-NEXT: vgatherdpd (,%ymm0), %zmm1 {%k2}
; SKX_32-NEXT: vmovapd %zmm2, %zmm0
; SKX_32-NEXT: movl %ebp, %esp
@@ -1934,7 +1934,7 @@ define void @test_scatter_16i32(<16 x i3
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
; SKX-NEXT: kshiftrw $8, %k1, %k2
; SKX-NEXT: vpscatterqd %ymm3, (,%zmm0) {%k1}
-; SKX-NEXT: vextracti32x8 $1, %zmm3, %ymm0
+; SKX-NEXT: vextracti64x4 $1, %zmm3, %ymm0
; SKX-NEXT: vpscatterqd %ymm0, (,%zmm1) {%k2}
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@@ -2016,7 +2016,7 @@ define void @test_scatter_16i64(<16 x i6
; SKX_32-NEXT: vmovdqa64 8(%ebp), %zmm1
; SKX_32-NEXT: kshiftrw $8, %k1, %k2
; SKX_32-NEXT: vpscatterdq %zmm2, (,%ymm0) {%k1}
-; SKX_32-NEXT: vextracti32x8 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; SKX_32-NEXT: vpscatterdq %zmm1, (,%ymm0) {%k2}
; SKX_32-NEXT: movl %ebp, %esp
; SKX_32-NEXT: popl %ebp
@@ -2055,7 +2055,7 @@ define void @test_scatter_16f32(<16 x fl
; SKX-NEXT: vptestmd %zmm2, %zmm2, %k1
; SKX-NEXT: kshiftrw $8, %k1, %k2
; SKX-NEXT: vscatterqps %ymm3, (,%zmm0) {%k1}
-; SKX-NEXT: vextractf32x8 $1, %zmm3, %ymm0
+; SKX-NEXT: vextractf64x4 $1, %zmm3, %ymm0
; SKX-NEXT: vscatterqps %ymm0, (,%zmm1) {%k2}
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@@ -2138,7 +2138,7 @@ define void @test_scatter_16f64(<16 x do
; SKX_32-NEXT: vmovapd 8(%ebp), %zmm1
; SKX_32-NEXT: kshiftrw $8, %k1, %k2
; SKX_32-NEXT: vscatterdpd %zmm2, (,%ymm0) {%k1}
-; SKX_32-NEXT: vextractf32x8 $1, %zmm0, %ymm0
+; SKX_32-NEXT: vextractf64x4 $1, %zmm0, %ymm0
; SKX_32-NEXT: vscatterdpd %zmm1, (,%ymm0) {%k2}
; SKX_32-NEXT: movl %ebp, %esp
; SKX_32-NEXT: popl %ebp
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-fp-avx512.ll Thu Aug 17 08:40:25 2017
@@ -458,7 +458,7 @@ define <4 x float> @stack_fold_extracti3
define <2 x double> @stack_fold_extractf64x2(<8 x double> %a0, <8 x double> %a1) {
;CHECK-LABEL: stack_fold_extractf64x2
- ;CHECK: vextractf64x2 $3, {{%zmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
+ ;CHECK: vextractf32x4 $3, {{%zmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
%1 = shufflevector <8 x double> %a0, <8 x double> %a1, <2 x i32> <i32 6, i32 7>
%2 = tail call <2 x double> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
ret <2 x double> %1
@@ -466,7 +466,7 @@ define <2 x double> @stack_fold_extractf
define <8 x float> @stack_fold_extracti32x8(<16 x float> %a0, <16 x float> %a1) {
;CHECK-LABEL: stack_fold_extracti32x8
- ;CHECK: vextractf32x8 $1, {{%zmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 32-byte Folded Spill
+ ;CHECK: vextractf64x4 $1, {{%zmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 32-byte Folded Spill
%1 = shufflevector <16 x float> %a0, <16 x float> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
ret <8 x float> %1
@@ -482,7 +482,7 @@ define <4 x double> @stack_fold_extractf
define <16 x float> @stack_fold_insertf32x8(<8 x float> %a0, <8 x float> %a1) {
;CHECK-LABEL: stack_fold_insertf32x8
- ;CHECK: vinsertf32x8 $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ ;CHECK: vinsertf64x4 $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = shufflevector <8 x float> %a0, <8 x float> %a1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x float> %2
Modified: llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/stack-folding-int-avx512.ll Thu Aug 17 08:40:25 2017
@@ -130,7 +130,7 @@ define <4 x i32> @stack_fold_extracti32x
define <2 x i64> @stack_fold_extracti64x2(<8 x i64> %a0, <8 x i64> %a1) {
;CHECK-LABEL: stack_fold_extracti64x2
- ;CHECK: vextracti64x2 $3, {{%zmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
+ ;CHECK: vextracti32x4 $3, {{%zmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 16-byte Folded Spill
; add forces execution domain
%1 = add <8 x i64> %a0, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
%2 = shufflevector <8 x i64> %1, <8 x i64> %a1, <2 x i32> <i32 6, i32 7>
@@ -140,7 +140,7 @@ define <2 x i64> @stack_fold_extracti64x
define <8 x i32> @stack_fold_extracti32x8(<16 x i32> %a0, <16 x i32> %a1) {
;CHECK-LABEL: stack_fold_extracti32x8
- ;CHECK: vextracti32x8 $1, {{%zmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 32-byte Folded Spill
+ ;CHECK: vextracti64x4 $1, {{%zmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 32-byte Folded Spill
; add forces execution domain
%1 = add <16 x i32> %a0, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%2 = shufflevector <16 x i32> %1, <16 x i32> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -160,7 +160,7 @@ define <4 x i64> @stack_fold_extracti64x
define <16 x i32> @stack_fold_inserti32x8(<8 x i32> %a0, <8 x i32> %a1) {
;CHECK-LABEL: stack_fold_inserti32x8
- ;CHECK: vinserti32x8 $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
+ ;CHECK: vinserti64x4 $1, {{-?[0-9]*}}(%rsp), {{%zmm[0-9][0-9]*}}, {{%zmm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = shufflevector <8 x i32> %a0, <8 x i32> %a1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; add forces execution domain
Modified: llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll Thu Aug 17 08:40:25 2017
@@ -1457,26 +1457,12 @@ define <16 x float> @reg_broadcast_4f32_
; X32-AVX-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX-NEXT: retl
;
-; X32-AVX512F-LABEL: reg_broadcast_4f32_16f32:
-; X32-AVX512F: # BB#0:
-; X32-AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X32-AVX512F-NEXT: retl
-;
-; X32-AVX512BW-LABEL: reg_broadcast_4f32_16f32:
-; X32-AVX512BW: # BB#0:
-; X32-AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; X32-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX512BW-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X32-AVX512BW-NEXT: retl
-;
-; X32-AVX512DQ-LABEL: reg_broadcast_4f32_16f32:
-; X32-AVX512DQ: # BB#0:
-; X32-AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX512DQ-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
-; X32-AVX512DQ-NEXT: retl
+; X32-AVX512-LABEL: reg_broadcast_4f32_16f32:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X32-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: reg_broadcast_4f32_16f32:
; X64-AVX: # BB#0:
@@ -1485,26 +1471,12 @@ define <16 x float> @reg_broadcast_4f32_
; X64-AVX-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX-NEXT: retq
;
-; X64-AVX512F-LABEL: reg_broadcast_4f32_16f32:
-; X64-AVX512F: # BB#0:
-; X64-AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X64-AVX512F-NEXT: retq
-;
-; X64-AVX512BW-LABEL: reg_broadcast_4f32_16f32:
-; X64-AVX512BW: # BB#0:
-; X64-AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; X64-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512BW-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512DQ-LABEL: reg_broadcast_4f32_16f32:
-; X64-AVX512DQ: # BB#0:
-; X64-AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512DQ-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
-; X64-AVX512DQ-NEXT: retq
+; X64-AVX512-LABEL: reg_broadcast_4f32_16f32:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X64-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X64-AVX512-NEXT: retq
%1 = shufflevector <4 x float> %a0, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x float> %1
}
@@ -1515,46 +1487,22 @@ define <16 x float> @reg_broadcast_8f32_
; X32-AVX-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX-NEXT: retl
;
-; X32-AVX512F-LABEL: reg_broadcast_8f32_16f32:
-; X32-AVX512F: # BB#0:
-; X32-AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X32-AVX512F-NEXT: retl
-;
-; X32-AVX512BW-LABEL: reg_broadcast_8f32_16f32:
-; X32-AVX512BW: # BB#0:
-; X32-AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X32-AVX512BW-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X32-AVX512BW-NEXT: retl
-;
-; X32-AVX512DQ-LABEL: reg_broadcast_8f32_16f32:
-; X32-AVX512DQ: # BB#0:
-; X32-AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X32-AVX512DQ-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
-; X32-AVX512DQ-NEXT: retl
+; X32-AVX512-LABEL: reg_broadcast_8f32_16f32:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; X32-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: reg_broadcast_8f32_16f32:
; X64-AVX: # BB#0:
; X64-AVX-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX-NEXT: retq
;
-; X64-AVX512F-LABEL: reg_broadcast_8f32_16f32:
-; X64-AVX512F: # BB#0:
-; X64-AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X64-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X64-AVX512F-NEXT: retq
-;
-; X64-AVX512BW-LABEL: reg_broadcast_8f32_16f32:
-; X64-AVX512BW: # BB#0:
-; X64-AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X64-AVX512BW-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512DQ-LABEL: reg_broadcast_8f32_16f32:
-; X64-AVX512DQ: # BB#0:
-; X64-AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X64-AVX512DQ-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
-; X64-AVX512DQ-NEXT: retq
+; X64-AVX512-LABEL: reg_broadcast_8f32_16f32:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X64-AVX512-NEXT: retq
%1 = shufflevector <8 x float> %a0, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <16 x float> %1
}
@@ -1583,26 +1531,12 @@ define <16 x i32> @reg_broadcast_4i32_16
; X32-AVX-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX-NEXT: retl
;
-; X32-AVX512F-LABEL: reg_broadcast_4i32_16i32:
-; X32-AVX512F: # BB#0:
-; X32-AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X32-AVX512F-NEXT: retl
-;
-; X32-AVX512BW-LABEL: reg_broadcast_4i32_16i32:
-; X32-AVX512BW: # BB#0:
-; X32-AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; X32-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX512BW-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X32-AVX512BW-NEXT: retl
-;
-; X32-AVX512DQ-LABEL: reg_broadcast_4i32_16i32:
-; X32-AVX512DQ: # BB#0:
-; X32-AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X32-AVX512DQ-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
-; X32-AVX512DQ-NEXT: retl
+; X32-AVX512-LABEL: reg_broadcast_4i32_16i32:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X32-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: reg_broadcast_4i32_16i32:
; X64-AVX: # BB#0:
@@ -1611,26 +1545,12 @@ define <16 x i32> @reg_broadcast_4i32_16
; X64-AVX-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX-NEXT: retq
;
-; X64-AVX512F-LABEL: reg_broadcast_4i32_16i32:
-; X64-AVX512F: # BB#0:
-; X64-AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X64-AVX512F-NEXT: retq
-;
-; X64-AVX512BW-LABEL: reg_broadcast_4i32_16i32:
-; X64-AVX512BW: # BB#0:
-; X64-AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; X64-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512BW-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512DQ-LABEL: reg_broadcast_4i32_16i32:
-; X64-AVX512DQ: # BB#0:
-; X64-AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
-; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; X64-AVX512DQ-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
-; X64-AVX512DQ-NEXT: retq
+; X64-AVX512-LABEL: reg_broadcast_4i32_16i32:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; X64-AVX512-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X64-AVX512-NEXT: retq
%1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x i32> %1
}
@@ -1641,46 +1561,22 @@ define <16 x i32> @reg_broadcast_8i32_16
; X32-AVX-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX-NEXT: retl
;
-; X32-AVX512F-LABEL: reg_broadcast_8i32_16i32:
-; X32-AVX512F: # BB#0:
-; X32-AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X32-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X32-AVX512F-NEXT: retl
-;
-; X32-AVX512BW-LABEL: reg_broadcast_8i32_16i32:
-; X32-AVX512BW: # BB#0:
-; X32-AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X32-AVX512BW-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X32-AVX512BW-NEXT: retl
-;
-; X32-AVX512DQ-LABEL: reg_broadcast_8i32_16i32:
-; X32-AVX512DQ: # BB#0:
-; X32-AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X32-AVX512DQ-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
-; X32-AVX512DQ-NEXT: retl
+; X32-AVX512-LABEL: reg_broadcast_8i32_16i32:
+; X32-AVX512: # BB#0:
+; X32-AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; X32-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X32-AVX512-NEXT: retl
;
; X64-AVX-LABEL: reg_broadcast_8i32_16i32:
; X64-AVX: # BB#0:
; X64-AVX-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX-NEXT: retq
;
-; X64-AVX512F-LABEL: reg_broadcast_8i32_16i32:
-; X64-AVX512F: # BB#0:
-; X64-AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X64-AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X64-AVX512F-NEXT: retq
-;
-; X64-AVX512BW-LABEL: reg_broadcast_8i32_16i32:
-; X64-AVX512BW: # BB#0:
-; X64-AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X64-AVX512BW-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
-; X64-AVX512BW-NEXT: retq
-;
-; X64-AVX512DQ-LABEL: reg_broadcast_8i32_16i32:
-; X64-AVX512DQ: # BB#0:
-; X64-AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
-; X64-AVX512DQ-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
-; X64-AVX512DQ-NEXT: retq
+; X64-AVX512-LABEL: reg_broadcast_8i32_16i32:
+; X64-AVX512: # BB#0:
+; X64-AVX512-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; X64-AVX512-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; X64-AVX512-NEXT: retq
%1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <16 x i32> %1
}
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-results.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-results.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-results.ll Thu Aug 17 08:40:25 2017
@@ -2073,353 +2073,121 @@ define <16 x i1> @test_cmp_v16f64(<16 x
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: test_cmp_v16f64:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vextractf32x4 $3, %zmm2, %xmm4
-; AVX512F-NEXT: vextractf32x4 $3, %zmm0, %xmm5
-; AVX512F-NEXT: xorl %eax, %eax
-; AVX512F-NEXT: vucomisd %xmm4, %xmm5
-; AVX512F-NEXT: movq $-1, %rcx
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm6
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512F-NEXT: vucomisd %xmm4, %xmm5
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm4
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0]
-; AVX512F-NEXT: vextractf32x4 $2, %zmm2, %xmm5
-; AVX512F-NEXT: vextractf32x4 $2, %zmm0, %xmm6
-; AVX512F-NEXT: vucomisd %xmm5, %xmm6
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm7
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0]
-; AVX512F-NEXT: vucomisd %xmm5, %xmm6
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm5
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0]
-; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4
-; AVX512F-NEXT: vextractf32x4 $1, %zmm2, %xmm5
-; AVX512F-NEXT: vextractf32x4 $1, %zmm0, %xmm6
-; AVX512F-NEXT: vucomisd %xmm5, %xmm6
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm7
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0]
-; AVX512F-NEXT: vucomisd %xmm5, %xmm6
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm5
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0]
-; AVX512F-NEXT: vucomisd %xmm2, %xmm0
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm6
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512F-NEXT: vucomisd %xmm2, %xmm0
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm0
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm6[0],xmm0[0]
-; AVX512F-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vextractf32x4 $3, %zmm3, %xmm2
-; AVX512F-NEXT: vextractf32x4 $3, %zmm1, %xmm4
-; AVX512F-NEXT: vucomisd %xmm2, %xmm4
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm5
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512F-NEXT: vucomisd %xmm2, %xmm4
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm2
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0]
-; AVX512F-NEXT: vextractf32x4 $2, %zmm3, %xmm4
-; AVX512F-NEXT: vextractf32x4 $2, %zmm1, %xmm5
-; AVX512F-NEXT: vucomisd %xmm4, %xmm5
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm6
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512F-NEXT: vucomisd %xmm4, %xmm5
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm4
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0]
-; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
-; AVX512F-NEXT: vextractf32x4 $1, %zmm3, %xmm4
-; AVX512F-NEXT: vextractf32x4 $1, %zmm1, %xmm5
-; AVX512F-NEXT: vucomisd %xmm4, %xmm5
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm6
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512F-NEXT: vucomisd %xmm4, %xmm5
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm4
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0]
-; AVX512F-NEXT: vucomisd %xmm3, %xmm1
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovaq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm5
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
-; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
-; AVX512F-NEXT: vucomisd %xmm3, %xmm1
-; AVX512F-NEXT: cmovaq %rcx, %rax
-; AVX512F-NEXT: vmovq %rax, %xmm1
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0]
-; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512DQ-LABEL: test_cmp_v16f64:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vextractf64x2 $3, %zmm2, %xmm4
-; AVX512DQ-NEXT: vextractf64x2 $3, %zmm0, %xmm5
-; AVX512DQ-NEXT: xorl %eax, %eax
-; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5
-; AVX512DQ-NEXT: movq $-1, %rcx
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm6
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm4
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0]
-; AVX512DQ-NEXT: vextractf64x2 $2, %zmm2, %xmm5
-; AVX512DQ-NEXT: vextractf64x2 $2, %zmm0, %xmm6
-; AVX512DQ-NEXT: vucomisd %xmm5, %xmm6
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm7
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0]
-; AVX512DQ-NEXT: vucomisd %xmm5, %xmm6
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm5
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0]
-; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4
-; AVX512DQ-NEXT: vextractf64x2 $1, %zmm2, %xmm5
-; AVX512DQ-NEXT: vextractf64x2 $1, %zmm0, %xmm6
-; AVX512DQ-NEXT: vucomisd %xmm5, %xmm6
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm7
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0]
-; AVX512DQ-NEXT: vucomisd %xmm5, %xmm6
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm5
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0]
-; AVX512DQ-NEXT: vucomisd %xmm2, %xmm0
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm6
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512DQ-NEXT: vucomisd %xmm2, %xmm0
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm0
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm6[0],xmm0[0]
-; AVX512DQ-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vextractf64x2 $3, %zmm3, %xmm2
-; AVX512DQ-NEXT: vextractf64x2 $3, %zmm1, %xmm4
-; AVX512DQ-NEXT: vucomisd %xmm2, %xmm4
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm5
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512DQ-NEXT: vucomisd %xmm2, %xmm4
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm2
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0]
-; AVX512DQ-NEXT: vextractf64x2 $2, %zmm3, %xmm4
-; AVX512DQ-NEXT: vextractf64x2 $2, %zmm1, %xmm5
-; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm6
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm4
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0]
-; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
-; AVX512DQ-NEXT: vextractf64x2 $1, %zmm3, %xmm4
-; AVX512DQ-NEXT: vextractf64x2 $1, %zmm1, %xmm5
-; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm6
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm4
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0]
-; AVX512DQ-NEXT: vucomisd %xmm3, %xmm1
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovaq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm5
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
-; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
-; AVX512DQ-NEXT: vucomisd %xmm3, %xmm1
-; AVX512DQ-NEXT: cmovaq %rcx, %rax
-; AVX512DQ-NEXT: vmovq %rax, %xmm1
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0]
-; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: test_cmp_v16f64:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vextractf32x4 $3, %zmm2, %xmm4
-; AVX512BW-NEXT: vextractf32x4 $3, %zmm0, %xmm5
-; AVX512BW-NEXT: xorl %eax, %eax
-; AVX512BW-NEXT: vucomisd %xmm4, %xmm5
-; AVX512BW-NEXT: movq $-1, %rcx
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm6
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512BW-NEXT: vucomisd %xmm4, %xmm5
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm4
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0]
-; AVX512BW-NEXT: vextractf32x4 $2, %zmm2, %xmm5
-; AVX512BW-NEXT: vextractf32x4 $2, %zmm0, %xmm6
-; AVX512BW-NEXT: vucomisd %xmm5, %xmm6
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm7
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0]
-; AVX512BW-NEXT: vucomisd %xmm5, %xmm6
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm5
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0]
-; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4
-; AVX512BW-NEXT: vextractf32x4 $1, %zmm2, %xmm5
-; AVX512BW-NEXT: vextractf32x4 $1, %zmm0, %xmm6
-; AVX512BW-NEXT: vucomisd %xmm5, %xmm6
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm7
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0]
-; AVX512BW-NEXT: vucomisd %xmm5, %xmm6
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm5
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0]
-; AVX512BW-NEXT: vucomisd %xmm2, %xmm0
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm6
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX512BW-NEXT: vucomisd %xmm2, %xmm0
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm0
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm6[0],xmm0[0]
-; AVX512BW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vextractf32x4 $3, %zmm3, %xmm2
-; AVX512BW-NEXT: vextractf32x4 $3, %zmm1, %xmm4
-; AVX512BW-NEXT: vucomisd %xmm2, %xmm4
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm5
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512BW-NEXT: vucomisd %xmm2, %xmm4
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm2
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0]
-; AVX512BW-NEXT: vextractf32x4 $2, %zmm3, %xmm4
-; AVX512BW-NEXT: vextractf32x4 $2, %zmm1, %xmm5
-; AVX512BW-NEXT: vucomisd %xmm4, %xmm5
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm6
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512BW-NEXT: vucomisd %xmm4, %xmm5
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm4
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0]
-; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
-; AVX512BW-NEXT: vextractf32x4 $1, %zmm3, %xmm4
-; AVX512BW-NEXT: vextractf32x4 $1, %zmm1, %xmm5
-; AVX512BW-NEXT: vucomisd %xmm4, %xmm5
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm6
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
-; AVX512BW-NEXT: vucomisd %xmm4, %xmm5
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm4
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0]
-; AVX512BW-NEXT: vucomisd %xmm3, %xmm1
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovaq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm5
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
-; AVX512BW-NEXT: vucomisd %xmm3, %xmm1
-; AVX512BW-NEXT: cmovaq %rcx, %rax
-; AVX512BW-NEXT: vmovq %rax, %xmm1
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0]
-; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
+; AVX512-LABEL: test_cmp_v16f64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vextractf32x4 $3, %zmm2, %xmm4
+; AVX512-NEXT: vextractf32x4 $3, %zmm0, %xmm5
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: vucomisd %xmm4, %xmm5
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm6
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
+; AVX512-NEXT: vucomisd %xmm4, %xmm5
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm4
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0]
+; AVX512-NEXT: vextractf32x4 $2, %zmm2, %xmm5
+; AVX512-NEXT: vextractf32x4 $2, %zmm0, %xmm6
+; AVX512-NEXT: vucomisd %xmm5, %xmm6
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm7
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0]
+; AVX512-NEXT: vucomisd %xmm5, %xmm6
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm5
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0]
+; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4
+; AVX512-NEXT: vextractf32x4 $1, %zmm2, %xmm5
+; AVX512-NEXT: vextractf32x4 $1, %zmm0, %xmm6
+; AVX512-NEXT: vucomisd %xmm5, %xmm6
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm7
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm6 = xmm6[1,0]
+; AVX512-NEXT: vucomisd %xmm5, %xmm6
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm5
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm7[0],xmm5[0]
+; AVX512-NEXT: vucomisd %xmm2, %xmm0
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm6
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
+; AVX512-NEXT: vucomisd %xmm2, %xmm0
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm0
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm6[0],xmm0[0]
+; AVX512-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
+; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vextractf32x4 $3, %zmm3, %xmm2
+; AVX512-NEXT: vextractf32x4 $3, %zmm1, %xmm4
+; AVX512-NEXT: vucomisd %xmm2, %xmm4
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm5
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0]
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
+; AVX512-NEXT: vucomisd %xmm2, %xmm4
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm2
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0]
+; AVX512-NEXT: vextractf32x4 $2, %zmm3, %xmm4
+; AVX512-NEXT: vextractf32x4 $2, %zmm1, %xmm5
+; AVX512-NEXT: vucomisd %xmm4, %xmm5
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm6
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
+; AVX512-NEXT: vucomisd %xmm4, %xmm5
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm4
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0]
+; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
+; AVX512-NEXT: vextractf32x4 $1, %zmm3, %xmm4
+; AVX512-NEXT: vextractf32x4 $1, %zmm1, %xmm5
+; AVX512-NEXT: vucomisd %xmm4, %xmm5
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm6
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm4 = xmm4[1,0]
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm5 = xmm5[1,0]
+; AVX512-NEXT: vucomisd %xmm4, %xmm5
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm4
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm6[0],xmm4[0]
+; AVX512-NEXT: vucomisd %xmm3, %xmm1
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovaq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm5
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm3 = xmm3[1,0]
+; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
+; AVX512-NEXT: vucomisd %xmm3, %xmm1
+; AVX512-NEXT: cmovaq %rcx, %rax
+; AVX512-NEXT: vmovq %rax, %xmm1
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0]
+; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = fcmp ogt <16 x double> %a0, %a1
ret <16 x i1> %1
}
@@ -3060,7 +2828,7 @@ define <32 x i1> @test_cmp_v32f32(<32 x
; AVX512DQ-NEXT: cmoval %ecx, %edx
; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm5, %xmm0
; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm0
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm8, %zmm0, %zmm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm8
; AVX512DQ-NEXT: vextractf32x4 $3, %zmm3, %xmm2
; AVX512DQ-NEXT: vmovshdup {{.*#+}} xmm4 = xmm2[1,1,3,3]
@@ -3157,7 +2925,7 @@ define <32 x i1> @test_cmp_v32f32(<32 x
; AVX512DQ-NEXT: cmoval %ecx, %eax
; AVX512DQ-NEXT: vpinsrd $3, %eax, %xmm4, %xmm1
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm2, %zmm0, %zmm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm8, %ymm0
; AVX512DQ-NEXT: retq
@@ -3295,638 +3063,374 @@ define <32 x i1> @test_cmp_v32f32(<32 x
; AVX512BW-NEXT: vucomiss %xmm5, %xmm7
; AVX512BW-NEXT: movl $0, %edx
; AVX512BW-NEXT: cmoval %ecx, %edx
-; AVX512BW-NEXT: vucomiss %xmm4, %xmm6
-; AVX512BW-NEXT: movl $0, %esi
-; AVX512BW-NEXT: cmoval %ecx, %esi
-; AVX512BW-NEXT: vmovd %esi, %xmm5
-; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0]
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm6[1,0]
-; AVX512BW-NEXT: vucomiss %xmm7, %xmm0
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmoval %ecx, %edx
-; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm5, %xmm0
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3]
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm5 = xmm6[3,1,2,3]
-; AVX512BW-NEXT: vucomiss %xmm4, %xmm5
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmoval %ecx, %edx
-; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
-; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; AVX512BW-NEXT: vextractf32x4 $1, %zmm3, %xmm0
-; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
-; AVX512BW-NEXT: vextractf32x4 $1, %zmm1, %xmm5
-; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3]
-; AVX512BW-NEXT: vucomiss %xmm4, %xmm6
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmoval %ecx, %edx
-; AVX512BW-NEXT: vucomiss %xmm0, %xmm5
-; AVX512BW-NEXT: movl $0, %esi
-; AVX512BW-NEXT: cmoval %ecx, %esi
-; AVX512BW-NEXT: vmovd %esi, %xmm4
-; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0]
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm7 = xmm5[1,0]
-; AVX512BW-NEXT: vucomiss %xmm6, %xmm7
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmoval %ecx, %edx
-; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3]
-; AVX512BW-NEXT: vucomiss %xmm0, %xmm5
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmoval %ecx, %edx
-; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm4, %xmm0
-; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
-; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm5 = xmm1[1,1,3,3]
-; AVX512BW-NEXT: vucomiss %xmm4, %xmm5
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmoval %ecx, %edx
-; AVX512BW-NEXT: vucomiss %xmm3, %xmm1
-; AVX512BW-NEXT: movl $0, %esi
-; AVX512BW-NEXT: cmoval %ecx, %esi
-; AVX512BW-NEXT: vmovd %esi, %xmm4
-; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm3[1,0]
-; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm1[1,0]
-; AVX512BW-NEXT: vucomiss %xmm5, %xmm6
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmoval %ecx, %edx
-; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3]
-; AVX512BW-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
-; AVX512BW-NEXT: vucomiss %xmm3, %xmm1
-; AVX512BW-NEXT: cmoval %ecx, %eax
-; AVX512BW-NEXT: vpinsrd $3, %eax, %xmm4, %xmm1
-; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0
-; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
-; AVX512BW-NEXT: retq
- %1 = fcmp ogt <32 x float> %a0, %a1
- ret <32 x i1> %1
-}
-
-define <16 x i1> @test_cmp_v16i64(<16 x i64> %a0, <16 x i64> %a1) nounwind {
-; SSE2-LABEL: test_cmp_v16i64:
-; SSE2: # BB#0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
-; SSE2-NEXT: pxor %xmm8, %xmm7
-; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
-; SSE2-NEXT: pxor %xmm8, %xmm9
-; SSE2-NEXT: movdqa %xmm7, %xmm10
-; SSE2-NEXT: pcmpgtd %xmm9, %xmm10
-; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm7, %xmm9
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm9[1,1,3,3]
-; SSE2-NEXT: pand %xmm11, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm10[1,1,3,3]
-; SSE2-NEXT: por %xmm7, %xmm9
-; SSE2-NEXT: pxor %xmm8, %xmm6
-; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm7
-; SSE2-NEXT: pxor %xmm8, %xmm7
-; SSE2-NEXT: movdqa %xmm6, %xmm10
-; SSE2-NEXT: pcmpgtd %xmm7, %xmm10
-; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
-; SSE2-NEXT: pand %xmm11, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
-; SSE2-NEXT: por %xmm7, %xmm10
-; SSE2-NEXT: packsswb %xmm9, %xmm10
-; SSE2-NEXT: pxor %xmm8, %xmm5
-; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm7
-; SSE2-NEXT: pxor %xmm8, %xmm7
-; SSE2-NEXT: movdqa %xmm5, %xmm6
-; SSE2-NEXT: pcmpgtd %xmm7, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm5, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
-; SSE2-NEXT: pand %xmm9, %xmm5
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
-; SSE2-NEXT: por %xmm5, %xmm6
-; SSE2-NEXT: pxor %xmm8, %xmm4
-; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm5
-; SSE2-NEXT: pxor %xmm8, %xmm5
-; SSE2-NEXT: movdqa %xmm4, %xmm7
-; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
-; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm7[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
-; SSE2-NEXT: pand %xmm9, %xmm5
-; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm7[1,1,3,3]
-; SSE2-NEXT: por %xmm5, %xmm4
-; SSE2-NEXT: packsswb %xmm6, %xmm4
-; SSE2-NEXT: packsswb %xmm10, %xmm4
-; SSE2-NEXT: pxor %xmm8, %xmm3
-; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm5
-; SSE2-NEXT: pxor %xmm8, %xmm5
-; SSE2-NEXT: movdqa %xmm3, %xmm6
-; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm3, %xmm5
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
-; SSE2-NEXT: pand %xmm7, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
-; SSE2-NEXT: por %xmm3, %xmm5
-; SSE2-NEXT: pxor %xmm8, %xmm2
-; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3
-; SSE2-NEXT: pxor %xmm8, %xmm3
-; SSE2-NEXT: movdqa %xmm2, %xmm6
-; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm2, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
-; SSE2-NEXT: pand %xmm7, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
-; SSE2-NEXT: por %xmm3, %xmm2
-; SSE2-NEXT: packsswb %xmm5, %xmm2
-; SSE2-NEXT: pxor %xmm8, %xmm1
-; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3
-; SSE2-NEXT: pxor %xmm8, %xmm3
-; SSE2-NEXT: movdqa %xmm1, %xmm5
-; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
-; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
-; SSE2-NEXT: pand %xmm6, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
-; SSE2-NEXT: por %xmm1, %xmm3
-; SSE2-NEXT: pxor %xmm8, %xmm0
-; SSE2-NEXT: pxor {{[0-9]+}}(%rsp), %xmm8
-; SSE2-NEXT: movdqa %xmm0, %xmm1
-; SSE2-NEXT: pcmpgtd %xmm8, %xmm1
-; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
-; SSE2-NEXT: pcmpeqd %xmm0, %xmm8
-; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm8[1,1,3,3]
-; SSE2-NEXT: pand %xmm5, %xmm6
-; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
-; SSE2-NEXT: por %xmm6, %xmm0
-; SSE2-NEXT: packsswb %xmm3, %xmm0
-; SSE2-NEXT: packsswb %xmm2, %xmm0
-; SSE2-NEXT: packsswb %xmm4, %xmm0
-; SSE2-NEXT: retq
-;
-; SSE42-LABEL: test_cmp_v16i64:
-; SSE42: # BB#0:
-; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm7
-; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm6
-; SSE42-NEXT: packsswb %xmm7, %xmm6
-; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm5
-; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm4
-; SSE42-NEXT: packsswb %xmm5, %xmm4
-; SSE42-NEXT: packsswb %xmm6, %xmm4
-; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm3
-; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm2
-; SSE42-NEXT: packsswb %xmm3, %xmm2
-; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm1
-; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm0
-; SSE42-NEXT: packsswb %xmm1, %xmm0
-; SSE42-NEXT: packsswb %xmm2, %xmm0
-; SSE42-NEXT: packsswb %xmm4, %xmm0
-; SSE42-NEXT: retq
-;
-; AVX1-LABEL: test_cmp_v16i64:
-; AVX1: # BB#0:
-; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm9
-; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
-; AVX1-NEXT: vpcmpgtq %xmm7, %xmm3, %xmm3
-; AVX1-NEXT: vpacksswb %xmm8, %xmm3, %xmm8
-; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm7, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm6, %xmm2, %xmm2
-; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpacksswb %xmm8, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm6, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
-; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
-; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm0
-; AVX1-NEXT: vpacksswb %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vzeroupper
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: test_cmp_v16i64:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpcmpgtq %ymm7, %ymm3, %ymm3
-; AVX2-NEXT: vpcmpgtq %ymm6, %ymm2, %ymm2
-; AVX2-NEXT: vpacksswb %ymm3, %ymm2, %ymm2
-; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
-; AVX2-NEXT: vpcmpgtq %ymm5, %ymm1, %ymm1
-; AVX2-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm0
-; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; AVX2-NEXT: vpacksswb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
-; AVX2-NEXT: vzeroupper
-; AVX2-NEXT: retq
-;
-; AVX512F-LABEL: test_cmp_v16i64:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vextracti32x4 $3, %zmm2, %xmm4
-; AVX512F-NEXT: vpextrq $1, %xmm4, %rcx
-; AVX512F-NEXT: vextracti32x4 $3, %zmm0, %xmm5
-; AVX512F-NEXT: vpextrq $1, %xmm5, %rdx
-; AVX512F-NEXT: xorl %eax, %eax
-; AVX512F-NEXT: cmpq %rcx, %rdx
-; AVX512F-NEXT: movq $-1, %rcx
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm6
-; AVX512F-NEXT: vmovq %xmm4, %rdx
-; AVX512F-NEXT: vmovq %xmm5, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm4
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
-; AVX512F-NEXT: vextracti32x4 $2, %zmm2, %xmm5
-; AVX512F-NEXT: vpextrq $1, %xmm5, %rdx
-; AVX512F-NEXT: vextracti32x4 $2, %zmm0, %xmm6
-; AVX512F-NEXT: vpextrq $1, %xmm6, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm7
-; AVX512F-NEXT: vmovq %xmm5, %rdx
-; AVX512F-NEXT: vmovq %xmm6, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm5
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0]
-; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4
-; AVX512F-NEXT: vextracti32x4 $1, %zmm2, %xmm5
-; AVX512F-NEXT: vpextrq $1, %xmm5, %rdx
-; AVX512F-NEXT: vextracti32x4 $1, %zmm0, %xmm6
-; AVX512F-NEXT: vpextrq $1, %xmm6, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm7
-; AVX512F-NEXT: vmovq %xmm5, %rdx
-; AVX512F-NEXT: vmovq %xmm6, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm5
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0]
-; AVX512F-NEXT: vpextrq $1, %xmm2, %rdx
-; AVX512F-NEXT: vpextrq $1, %xmm0, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm6
-; AVX512F-NEXT: vmovq %xmm2, %rdx
-; AVX512F-NEXT: vmovq %xmm0, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm0
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
-; AVX512F-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vextracti32x4 $3, %zmm3, %xmm2
-; AVX512F-NEXT: vpextrq $1, %xmm2, %rdx
-; AVX512F-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; AVX512F-NEXT: vpextrq $1, %xmm4, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm5
-; AVX512F-NEXT: vmovq %xmm2, %rdx
-; AVX512F-NEXT: vmovq %xmm4, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm2
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
-; AVX512F-NEXT: vextracti32x4 $2, %zmm3, %xmm4
-; AVX512F-NEXT: vpextrq $1, %xmm4, %rdx
-; AVX512F-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; AVX512F-NEXT: vpextrq $1, %xmm5, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm6
-; AVX512F-NEXT: vmovq %xmm4, %rdx
-; AVX512F-NEXT: vmovq %xmm5, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm4
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
-; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
-; AVX512F-NEXT: vextracti32x4 $1, %zmm3, %xmm4
-; AVX512F-NEXT: vpextrq $1, %xmm4, %rdx
-; AVX512F-NEXT: vextracti32x4 $1, %zmm1, %xmm5
-; AVX512F-NEXT: vpextrq $1, %xmm5, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm6
-; AVX512F-NEXT: vmovq %xmm4, %rdx
-; AVX512F-NEXT: vmovq %xmm5, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm4
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
-; AVX512F-NEXT: vpextrq $1, %xmm3, %rdx
-; AVX512F-NEXT: vpextrq $1, %xmm1, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: movl $0, %edx
-; AVX512F-NEXT: cmovgq %rcx, %rdx
-; AVX512F-NEXT: vmovq %rdx, %xmm5
-; AVX512F-NEXT: vmovq %xmm3, %rdx
-; AVX512F-NEXT: vmovq %xmm1, %rsi
-; AVX512F-NEXT: cmpq %rdx, %rsi
-; AVX512F-NEXT: cmovgq %rcx, %rax
-; AVX512F-NEXT: vmovq %rax, %xmm1
-; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
-; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512DQ-LABEL: test_cmp_v16i64:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vextracti64x2 $3, %zmm2, %xmm4
-; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rcx
-; AVX512DQ-NEXT: vextracti64x2 $3, %zmm0, %xmm5
-; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rdx
-; AVX512DQ-NEXT: xorl %eax, %eax
-; AVX512DQ-NEXT: cmpq %rcx, %rdx
-; AVX512DQ-NEXT: movq $-1, %rcx
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm6
-; AVX512DQ-NEXT: vmovq %xmm4, %rdx
-; AVX512DQ-NEXT: vmovq %xmm5, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm4
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
-; AVX512DQ-NEXT: vextracti64x2 $2, %zmm2, %xmm5
-; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $2, %zmm0, %xmm6
-; AVX512DQ-NEXT: vpextrq $1, %xmm6, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm7
-; AVX512DQ-NEXT: vmovq %xmm5, %rdx
-; AVX512DQ-NEXT: vmovq %xmm6, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm5
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0]
-; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4
-; AVX512DQ-NEXT: vextracti64x2 $1, %zmm2, %xmm5
-; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $1, %zmm0, %xmm6
-; AVX512DQ-NEXT: vpextrq $1, %xmm6, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm7
-; AVX512DQ-NEXT: vmovq %xmm5, %rdx
-; AVX512DQ-NEXT: vmovq %xmm6, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm5
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0]
-; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rdx
-; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm6
-; AVX512DQ-NEXT: vmovq %xmm2, %rdx
-; AVX512DQ-NEXT: vmovq %xmm0, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm0
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
-; AVX512DQ-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vextracti64x2 $3, %zmm3, %xmm2
-; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $3, %zmm1, %xmm4
-; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm5
-; AVX512DQ-NEXT: vmovq %xmm2, %rdx
-; AVX512DQ-NEXT: vmovq %xmm4, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm2
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
-; AVX512DQ-NEXT: vextracti64x2 $2, %zmm3, %xmm4
-; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $2, %zmm1, %xmm5
-; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm6
-; AVX512DQ-NEXT: vmovq %xmm4, %rdx
-; AVX512DQ-NEXT: vmovq %xmm5, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm4
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
-; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
-; AVX512DQ-NEXT: vextracti64x2 $1, %zmm3, %xmm4
-; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $1, %zmm1, %xmm5
-; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm6
-; AVX512DQ-NEXT: vmovq %xmm4, %rdx
-; AVX512DQ-NEXT: vmovq %xmm5, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm4
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
-; AVX512DQ-NEXT: vpextrq $1, %xmm3, %rdx
-; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: movl $0, %edx
-; AVX512DQ-NEXT: cmovgq %rcx, %rdx
-; AVX512DQ-NEXT: vmovq %rdx, %xmm5
-; AVX512DQ-NEXT: vmovq %xmm3, %rdx
-; AVX512DQ-NEXT: vmovq %xmm1, %rsi
-; AVX512DQ-NEXT: cmpq %rdx, %rsi
-; AVX512DQ-NEXT: cmovgq %rcx, %rax
-; AVX512DQ-NEXT: vmovq %rax, %xmm1
-; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
-; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
-;
-; AVX512BW-LABEL: test_cmp_v16i64:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm2, %xmm4
-; AVX512BW-NEXT: vpextrq $1, %xmm4, %rcx
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm5
-; AVX512BW-NEXT: vpextrq $1, %xmm5, %rdx
-; AVX512BW-NEXT: xorl %eax, %eax
-; AVX512BW-NEXT: cmpq %rcx, %rdx
-; AVX512BW-NEXT: movq $-1, %rcx
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm6
-; AVX512BW-NEXT: vmovq %xmm4, %rdx
-; AVX512BW-NEXT: vmovq %xmm5, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm4
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm2, %xmm5
-; AVX512BW-NEXT: vpextrq $1, %xmm5, %rdx
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm6
-; AVX512BW-NEXT: vpextrq $1, %xmm6, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm7
-; AVX512BW-NEXT: vmovq %xmm5, %rdx
-; AVX512BW-NEXT: vmovq %xmm6, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm5
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0]
-; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4
-; AVX512BW-NEXT: vextracti32x4 $1, %zmm2, %xmm5
-; AVX512BW-NEXT: vpextrq $1, %xmm5, %rdx
-; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm6
-; AVX512BW-NEXT: vpextrq $1, %xmm6, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm7
-; AVX512BW-NEXT: vmovq %xmm5, %rdx
-; AVX512BW-NEXT: vmovq %xmm6, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm5
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0]
-; AVX512BW-NEXT: vpextrq $1, %xmm2, %rdx
-; AVX512BW-NEXT: vpextrq $1, %xmm0, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm6
-; AVX512BW-NEXT: vmovq %xmm2, %rdx
-; AVX512BW-NEXT: vmovq %xmm0, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
-; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm0
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
-; AVX512BW-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm3, %xmm2
-; AVX512BW-NEXT: vpextrq $1, %xmm2, %rdx
-; AVX512BW-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; AVX512BW-NEXT: vpextrq $1, %xmm4, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
+; AVX512BW-NEXT: vucomiss %xmm4, %xmm6
+; AVX512BW-NEXT: movl $0, %esi
+; AVX512BW-NEXT: cmoval %ecx, %esi
+; AVX512BW-NEXT: vmovd %esi, %xmm5
+; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm5, %xmm5
+; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm7 = xmm4[1,0]
+; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm0 = xmm6[1,0]
+; AVX512BW-NEXT: vucomiss %xmm7, %xmm0
; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm5
-; AVX512BW-NEXT: vmovq %xmm2, %rdx
-; AVX512BW-NEXT: vmovq %xmm4, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
+; AVX512BW-NEXT: cmoval %ecx, %edx
+; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm5, %xmm0
+; AVX512BW-NEXT: vpermilps {{.*#+}} xmm4 = xmm4[3,1,2,3]
+; AVX512BW-NEXT: vpermilps {{.*#+}} xmm5 = xmm6[3,1,2,3]
+; AVX512BW-NEXT: vucomiss %xmm4, %xmm5
; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm2
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm3, %xmm4
-; AVX512BW-NEXT: vpextrq $1, %xmm4, %rdx
-; AVX512BW-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; AVX512BW-NEXT: vpextrq $1, %xmm5, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
+; AVX512BW-NEXT: cmoval %ecx, %edx
+; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm0, %xmm0
+; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
+; AVX512BW-NEXT: vextractf32x4 $1, %zmm3, %xmm0
+; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
+; AVX512BW-NEXT: vextractf32x4 $1, %zmm1, %xmm5
+; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm6 = xmm5[1,1,3,3]
+; AVX512BW-NEXT: vucomiss %xmm4, %xmm6
; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm6
-; AVX512BW-NEXT: vmovq %xmm4, %rdx
-; AVX512BW-NEXT: vmovq %xmm5, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
+; AVX512BW-NEXT: cmoval %ecx, %edx
+; AVX512BW-NEXT: vucomiss %xmm0, %xmm5
+; AVX512BW-NEXT: movl $0, %esi
+; AVX512BW-NEXT: cmoval %ecx, %esi
+; AVX512BW-NEXT: vmovd %esi, %xmm4
+; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4
+; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm0[1,0]
+; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm7 = xmm5[1,0]
+; AVX512BW-NEXT: vucomiss %xmm6, %xmm7
; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm4
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
-; AVX512BW-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
-; AVX512BW-NEXT: vextracti32x4 $1, %zmm3, %xmm4
-; AVX512BW-NEXT: vpextrq $1, %xmm4, %rdx
-; AVX512BW-NEXT: vextracti32x4 $1, %zmm1, %xmm5
-; AVX512BW-NEXT: vpextrq $1, %xmm5, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
+; AVX512BW-NEXT: cmoval %ecx, %edx
+; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
+; AVX512BW-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3]
+; AVX512BW-NEXT: vpermilps {{.*#+}} xmm5 = xmm5[3,1,2,3]
+; AVX512BW-NEXT: vucomiss %xmm0, %xmm5
; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm6
-; AVX512BW-NEXT: vmovq %xmm4, %rdx
-; AVX512BW-NEXT: vmovq %xmm5, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
+; AVX512BW-NEXT: cmoval %ecx, %edx
+; AVX512BW-NEXT: vpinsrd $3, %edx, %xmm4, %xmm0
+; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm4 = xmm3[1,1,3,3]
+; AVX512BW-NEXT: vmovshdup {{.*#+}} xmm5 = xmm1[1,1,3,3]
+; AVX512BW-NEXT: vucomiss %xmm4, %xmm5
; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm4
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
-; AVX512BW-NEXT: vpextrq $1, %xmm3, %rdx
-; AVX512BW-NEXT: vpextrq $1, %xmm1, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
+; AVX512BW-NEXT: cmoval %ecx, %edx
+; AVX512BW-NEXT: vucomiss %xmm3, %xmm1
+; AVX512BW-NEXT: movl $0, %esi
+; AVX512BW-NEXT: cmoval %ecx, %esi
+; AVX512BW-NEXT: vmovd %esi, %xmm4
+; AVX512BW-NEXT: vpinsrd $1, %edx, %xmm4, %xmm4
+; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm5 = xmm3[1,0]
+; AVX512BW-NEXT: vpermilpd {{.*#+}} xmm6 = xmm1[1,0]
+; AVX512BW-NEXT: vucomiss %xmm5, %xmm6
; AVX512BW-NEXT: movl $0, %edx
-; AVX512BW-NEXT: cmovgq %rcx, %rdx
-; AVX512BW-NEXT: vmovq %rdx, %xmm5
-; AVX512BW-NEXT: vmovq %xmm3, %rdx
-; AVX512BW-NEXT: vmovq %xmm1, %rsi
-; AVX512BW-NEXT: cmpq %rdx, %rsi
-; AVX512BW-NEXT: cmovgq %rcx, %rax
-; AVX512BW-NEXT: vmovq %rax, %xmm1
-; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
-; AVX512BW-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: cmoval %ecx, %edx
+; AVX512BW-NEXT: vpinsrd $2, %edx, %xmm4, %xmm4
+; AVX512BW-NEXT: vpermilps {{.*#+}} xmm3 = xmm3[3,1,2,3]
+; AVX512BW-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; AVX512BW-NEXT: vucomiss %xmm3, %xmm1
+; AVX512BW-NEXT: cmoval %ecx, %eax
+; AVX512BW-NEXT: vpinsrd $3, %eax, %xmm4, %xmm1
+; AVX512BW-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0
+; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
; AVX512BW-NEXT: retq
+ %1 = fcmp ogt <32 x float> %a0, %a1
+ ret <32 x i1> %1
+}
+
+define <16 x i1> @test_cmp_v16i64(<16 x i64> %a0, <16 x i64> %a1) nounwind {
+; SSE2-LABEL: test_cmp_v16i64:
+; SSE2: # BB#0:
+; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0]
+; SSE2-NEXT: pxor %xmm8, %xmm7
+; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm9
+; SSE2-NEXT: pxor %xmm8, %xmm9
+; SSE2-NEXT: movdqa %xmm7, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm9, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm7, %xmm9
+; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm9[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm10[1,1,3,3]
+; SSE2-NEXT: por %xmm7, %xmm9
+; SSE2-NEXT: pxor %xmm8, %xmm6
+; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm7
+; SSE2-NEXT: pxor %xmm8, %xmm7
+; SSE2-NEXT: movdqa %xmm6, %xmm10
+; SSE2-NEXT: pcmpgtd %xmm7, %xmm10
+; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm6, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm11, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm10[1,1,3,3]
+; SSE2-NEXT: por %xmm7, %xmm10
+; SSE2-NEXT: packsswb %xmm9, %xmm10
+; SSE2-NEXT: pxor %xmm8, %xmm5
+; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm7
+; SSE2-NEXT: pxor %xmm8, %xmm7
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm7, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
+; SSE2-NEXT: pand %xmm9, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm5, %xmm6
+; SSE2-NEXT: pxor %xmm8, %xmm4
+; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE2-NEXT: pxor %xmm8, %xmm5
+; SSE2-NEXT: movdqa %xmm4, %xmm7
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm7[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm9, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm7[1,1,3,3]
+; SSE2-NEXT: por %xmm5, %xmm4
+; SSE2-NEXT: packsswb %xmm6, %xmm4
+; SSE2-NEXT: packsswb %xmm10, %xmm4
+; SSE2-NEXT: pxor %xmm8, %xmm3
+; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm5
+; SSE2-NEXT: pxor %xmm8, %xmm5
+; SSE2-NEXT: movdqa %xmm3, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm3, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
+; SSE2-NEXT: pand %xmm7, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm8, %xmm2
+; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3
+; SSE2-NEXT: pxor %xmm8, %xmm3
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm2, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
+; SSE2-NEXT: pand %xmm7, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
+; SSE2-NEXT: por %xmm3, %xmm2
+; SSE2-NEXT: packsswb %xmm5, %xmm2
+; SSE2-NEXT: pxor %xmm8, %xmm1
+; SSE2-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm3
+; SSE2-NEXT: pxor %xmm8, %xmm3
+; SSE2-NEXT: movdqa %xmm1, %xmm5
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm3
+; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
+; SSE2-NEXT: pand %xmm6, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
+; SSE2-NEXT: por %xmm1, %xmm3
+; SSE2-NEXT: pxor %xmm8, %xmm0
+; SSE2-NEXT: pxor {{[0-9]+}}(%rsp), %xmm8
+; SSE2-NEXT: movdqa %xmm0, %xmm1
+; SSE2-NEXT: pcmpgtd %xmm8, %xmm1
+; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm1[0,0,2,2]
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm8
+; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm8[1,1,3,3]
+; SSE2-NEXT: pand %xmm5, %xmm6
+; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE2-NEXT: por %xmm6, %xmm0
+; SSE2-NEXT: packsswb %xmm3, %xmm0
+; SSE2-NEXT: packsswb %xmm2, %xmm0
+; SSE2-NEXT: packsswb %xmm4, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE42-LABEL: test_cmp_v16i64:
+; SSE42: # BB#0:
+; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm7
+; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm6
+; SSE42-NEXT: packsswb %xmm7, %xmm6
+; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm5
+; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm4
+; SSE42-NEXT: packsswb %xmm5, %xmm4
+; SSE42-NEXT: packsswb %xmm6, %xmm4
+; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm3
+; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm2
+; SSE42-NEXT: packsswb %xmm3, %xmm2
+; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm1
+; SSE42-NEXT: pcmpgtq {{[0-9]+}}(%rsp), %xmm0
+; SSE42-NEXT: packsswb %xmm1, %xmm0
+; SSE42-NEXT: packsswb %xmm2, %xmm0
+; SSE42-NEXT: packsswb %xmm4, %xmm0
+; SSE42-NEXT: retq
+;
+; AVX1-LABEL: test_cmp_v16i64:
+; AVX1: # BB#0:
+; AVX1-NEXT: vextractf128 $1, %ymm7, %xmm8
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm9
+; AVX1-NEXT: vpcmpgtq %xmm8, %xmm9, %xmm8
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm3, %xmm3
+; AVX1-NEXT: vpacksswb %xmm8, %xmm3, %xmm8
+; AVX1-NEXT: vextractf128 $1, %ymm6, %xmm7
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm7, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm6, %xmm2, %xmm2
+; AVX1-NEXT: vpacksswb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpacksswb %xmm8, %xmm2, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm5, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpacksswb %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vextractf128 $1, %ymm4, %xmm3
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5
+; AVX1-NEXT: vpcmpgtq %xmm3, %xmm5, %xmm3
+; AVX1-NEXT: vpcmpgtq %xmm4, %xmm0, %xmm0
+; AVX1-NEXT: vpacksswb %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpacksswb %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: test_cmp_v16i64:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpcmpgtq %ymm7, %ymm3, %ymm3
+; AVX2-NEXT: vpcmpgtq %ymm6, %ymm2, %ymm2
+; AVX2-NEXT: vpacksswb %ymm3, %ymm2, %ymm2
+; AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,1,3]
+; AVX2-NEXT: vpcmpgtq %ymm5, %ymm1, %ymm1
+; AVX2-NEXT: vpcmpgtq %ymm4, %ymm0, %ymm0
+; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vpacksswb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
+;
+; AVX512-LABEL: test_cmp_v16i64:
+; AVX512: # BB#0:
+; AVX512-NEXT: vextracti32x4 $3, %zmm2, %xmm4
+; AVX512-NEXT: vpextrq $1, %xmm4, %rcx
+; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm5
+; AVX512-NEXT: vpextrq $1, %xmm5, %rdx
+; AVX512-NEXT: xorl %eax, %eax
+; AVX512-NEXT: cmpq %rcx, %rdx
+; AVX512-NEXT: movq $-1, %rcx
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm6
+; AVX512-NEXT: vmovq %xmm4, %rdx
+; AVX512-NEXT: vmovq %xmm5, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm4
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
+; AVX512-NEXT: vextracti32x4 $2, %zmm2, %xmm5
+; AVX512-NEXT: vpextrq $1, %xmm5, %rdx
+; AVX512-NEXT: vextracti32x4 $2, %zmm0, %xmm6
+; AVX512-NEXT: vpextrq $1, %xmm6, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm7
+; AVX512-NEXT: vmovq %xmm5, %rdx
+; AVX512-NEXT: vmovq %xmm6, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm5
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0]
+; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm4
+; AVX512-NEXT: vextracti32x4 $1, %zmm2, %xmm5
+; AVX512-NEXT: vpextrq $1, %xmm5, %rdx
+; AVX512-NEXT: vextracti32x4 $1, %zmm0, %xmm6
+; AVX512-NEXT: vpextrq $1, %xmm6, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm7
+; AVX512-NEXT: vmovq %xmm5, %rdx
+; AVX512-NEXT: vmovq %xmm6, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm5
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm7[0]
+; AVX512-NEXT: vpextrq $1, %xmm2, %rdx
+; AVX512-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm6
+; AVX512-NEXT: vmovq %xmm2, %rdx
+; AVX512-NEXT: vmovq %xmm0, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm0
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm6[0]
+; AVX512-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
+; AVX512-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vextracti32x4 $3, %zmm3, %xmm2
+; AVX512-NEXT: vpextrq $1, %xmm2, %rdx
+; AVX512-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; AVX512-NEXT: vpextrq $1, %xmm4, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm5
+; AVX512-NEXT: vmovq %xmm2, %rdx
+; AVX512-NEXT: vmovq %xmm4, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm2
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
+; AVX512-NEXT: vextracti32x4 $2, %zmm3, %xmm4
+; AVX512-NEXT: vpextrq $1, %xmm4, %rdx
+; AVX512-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; AVX512-NEXT: vpextrq $1, %xmm5, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm6
+; AVX512-NEXT: vmovq %xmm4, %rdx
+; AVX512-NEXT: vmovq %xmm5, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm4
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
+; AVX512-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
+; AVX512-NEXT: vextracti32x4 $1, %zmm3, %xmm4
+; AVX512-NEXT: vpextrq $1, %xmm4, %rdx
+; AVX512-NEXT: vextracti32x4 $1, %zmm1, %xmm5
+; AVX512-NEXT: vpextrq $1, %xmm5, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm6
+; AVX512-NEXT: vmovq %xmm4, %rdx
+; AVX512-NEXT: vmovq %xmm5, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm4
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm6[0]
+; AVX512-NEXT: vpextrq $1, %xmm3, %rdx
+; AVX512-NEXT: vpextrq $1, %xmm1, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: movl $0, %edx
+; AVX512-NEXT: cmovgq %rcx, %rdx
+; AVX512-NEXT: vmovq %rdx, %xmm5
+; AVX512-NEXT: vmovq %xmm3, %rdx
+; AVX512-NEXT: vmovq %xmm1, %rsi
+; AVX512-NEXT: cmpq %rdx, %rsi
+; AVX512-NEXT: cmovgq %rcx, %rax
+; AVX512-NEXT: vmovq %rax, %xmm1
+; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
+; AVX512-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = icmp sgt <16 x i64> %a0, %a1
ret <16 x i1> %1
}
@@ -4583,7 +4087,7 @@ define <32 x i1> @test_cmp_v32i32(<32 x
; AVX512DQ-NEXT: cmovgl %ecx, %edx
; AVX512DQ-NEXT: vpinsrd $3, %edx, %xmm6, %xmm0
; AVX512DQ-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm4, %zmm0, %zmm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vextracti32x4 $3, %zmm3, %xmm2
; AVX512DQ-NEXT: vpextrd $1, %xmm2, %edx
@@ -4688,7 +4192,7 @@ define <32 x i1> @test_cmp_v32i32(<32 x
; AVX512DQ-NEXT: cmovgl %ecx, %eax
; AVX512DQ-NEXT: vpinsrd $3, %eax, %xmm5, %xmm1
; AVX512DQ-NEXT: vinserti128 $1, %xmm4, %ymm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm2, %zmm1, %zmm1
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; AVX512DQ-NEXT: retq
@@ -9176,8 +8680,8 @@ define <32 x i1> @test_cmp_v32f64(<32 x
;
; AVX512DQ-LABEL: test_cmp_v32f64:
; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vextractf64x2 $3, %zmm4, %xmm8
-; AVX512DQ-NEXT: vextractf64x2 $3, %zmm0, %xmm9
+; AVX512DQ-NEXT: vextractf32x4 $3, %zmm4, %xmm8
+; AVX512DQ-NEXT: vextractf32x4 $3, %zmm0, %xmm9
; AVX512DQ-NEXT: xorl %eax, %eax
; AVX512DQ-NEXT: vucomisd %xmm8, %xmm9
; AVX512DQ-NEXT: movq $-1, %rcx
@@ -9191,8 +8695,8 @@ define <32 x i1> @test_cmp_v32f64(<32 x
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
; AVX512DQ-NEXT: vmovq %rdx, %xmm8
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm10[0],xmm8[0]
-; AVX512DQ-NEXT: vextractf64x2 $2, %zmm4, %xmm9
-; AVX512DQ-NEXT: vextractf64x2 $2, %zmm0, %xmm10
+; AVX512DQ-NEXT: vextractf32x4 $2, %zmm4, %xmm9
+; AVX512DQ-NEXT: vextractf32x4 $2, %zmm0, %xmm10
; AVX512DQ-NEXT: vucomisd %xmm9, %xmm10
; AVX512DQ-NEXT: movl $0, %edx
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
@@ -9205,8 +8709,8 @@ define <32 x i1> @test_cmp_v32f64(<32 x
; AVX512DQ-NEXT: vmovq %rdx, %xmm9
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm11[0],xmm9[0]
; AVX512DQ-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm8
-; AVX512DQ-NEXT: vextractf64x2 $1, %zmm4, %xmm9
-; AVX512DQ-NEXT: vextractf64x2 $1, %zmm0, %xmm10
+; AVX512DQ-NEXT: vextractf32x4 $1, %zmm4, %xmm9
+; AVX512DQ-NEXT: vextractf32x4 $1, %zmm0, %xmm10
; AVX512DQ-NEXT: vucomisd %xmm9, %xmm10
; AVX512DQ-NEXT: movl $0, %edx
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
@@ -9232,8 +8736,8 @@ define <32 x i1> @test_cmp_v32f64(<32 x
; AVX512DQ-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm8
-; AVX512DQ-NEXT: vextractf64x2 $3, %zmm5, %xmm4
-; AVX512DQ-NEXT: vextractf64x2 $3, %zmm1, %xmm0
+; AVX512DQ-NEXT: vextractf32x4 $3, %zmm5, %xmm4
+; AVX512DQ-NEXT: vextractf32x4 $3, %zmm1, %xmm0
; AVX512DQ-NEXT: vucomisd %xmm4, %xmm0
; AVX512DQ-NEXT: movl $0, %edx
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
@@ -9245,8 +8749,8 @@ define <32 x i1> @test_cmp_v32f64(<32 x
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
; AVX512DQ-NEXT: vmovq %rdx, %xmm0
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm0[0]
-; AVX512DQ-NEXT: vextractf64x2 $2, %zmm5, %xmm4
-; AVX512DQ-NEXT: vextractf64x2 $2, %zmm1, %xmm0
+; AVX512DQ-NEXT: vextractf32x4 $2, %zmm5, %xmm4
+; AVX512DQ-NEXT: vextractf32x4 $2, %zmm1, %xmm0
; AVX512DQ-NEXT: vucomisd %xmm4, %xmm0
; AVX512DQ-NEXT: movl $0, %edx
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
@@ -9259,8 +8763,8 @@ define <32 x i1> @test_cmp_v32f64(<32 x
; AVX512DQ-NEXT: vmovq %rdx, %xmm0
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm10[0],xmm0[0]
; AVX512DQ-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm9
-; AVX512DQ-NEXT: vextractf64x2 $1, %zmm5, %xmm4
-; AVX512DQ-NEXT: vextractf64x2 $1, %zmm1, %xmm0
+; AVX512DQ-NEXT: vextractf32x4 $1, %zmm5, %xmm4
+; AVX512DQ-NEXT: vextractf32x4 $1, %zmm1, %xmm0
; AVX512DQ-NEXT: vucomisd %xmm4, %xmm0
; AVX512DQ-NEXT: movl $0, %edx
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
@@ -9286,10 +8790,10 @@ define <32 x i1> @test_cmp_v32f64(<32 x
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm0, %zmm8, %zmm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm8
-; AVX512DQ-NEXT: vextractf64x2 $3, %zmm6, %xmm1
-; AVX512DQ-NEXT: vextractf64x2 $3, %zmm2, %xmm4
+; AVX512DQ-NEXT: vextractf32x4 $3, %zmm6, %xmm1
+; AVX512DQ-NEXT: vextractf32x4 $3, %zmm2, %xmm4
; AVX512DQ-NEXT: vucomisd %xmm1, %xmm4
; AVX512DQ-NEXT: movl $0, %edx
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
@@ -9301,8 +8805,8 @@ define <32 x i1> @test_cmp_v32f64(<32 x
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
; AVX512DQ-NEXT: vmovq %rdx, %xmm1
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm5[0],xmm1[0]
-; AVX512DQ-NEXT: vextractf64x2 $2, %zmm6, %xmm4
-; AVX512DQ-NEXT: vextractf64x2 $2, %zmm2, %xmm5
+; AVX512DQ-NEXT: vextractf32x4 $2, %zmm6, %xmm4
+; AVX512DQ-NEXT: vextractf32x4 $2, %zmm2, %xmm5
; AVX512DQ-NEXT: vucomisd %xmm4, %xmm5
; AVX512DQ-NEXT: movl $0, %edx
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
@@ -9315,8 +8819,8 @@ define <32 x i1> @test_cmp_v32f64(<32 x
; AVX512DQ-NEXT: vmovq %rdx, %xmm4
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vextractf64x2 $1, %zmm6, %xmm1
-; AVX512DQ-NEXT: vextractf64x2 $1, %zmm2, %xmm4
+; AVX512DQ-NEXT: vextractf32x4 $1, %zmm6, %xmm1
+; AVX512DQ-NEXT: vextractf32x4 $1, %zmm2, %xmm4
; AVX512DQ-NEXT: vucomisd %xmm1, %xmm4
; AVX512DQ-NEXT: movl $0, %edx
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
@@ -9342,8 +8846,8 @@ define <32 x i1> @test_cmp_v32f64(<32 x
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vextractf64x2 $3, %zmm7, %xmm1
-; AVX512DQ-NEXT: vextractf64x2 $3, %zmm3, %xmm2
+; AVX512DQ-NEXT: vextractf32x4 $3, %zmm7, %xmm1
+; AVX512DQ-NEXT: vextractf32x4 $3, %zmm3, %xmm2
; AVX512DQ-NEXT: vucomisd %xmm1, %xmm2
; AVX512DQ-NEXT: movl $0, %edx
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
@@ -9355,8 +8859,8 @@ define <32 x i1> @test_cmp_v32f64(<32 x
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
; AVX512DQ-NEXT: vmovq %rdx, %xmm1
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm4[0],xmm1[0]
-; AVX512DQ-NEXT: vextractf64x2 $2, %zmm7, %xmm2
-; AVX512DQ-NEXT: vextractf64x2 $2, %zmm3, %xmm4
+; AVX512DQ-NEXT: vextractf32x4 $2, %zmm7, %xmm2
+; AVX512DQ-NEXT: vextractf32x4 $2, %zmm3, %xmm4
; AVX512DQ-NEXT: vucomisd %xmm2, %xmm4
; AVX512DQ-NEXT: movl $0, %edx
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
@@ -9369,8 +8873,8 @@ define <32 x i1> @test_cmp_v32f64(<32 x
; AVX512DQ-NEXT: vmovq %rdx, %xmm2
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm5[0],xmm2[0]
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; AVX512DQ-NEXT: vextractf64x2 $1, %zmm7, %xmm2
-; AVX512DQ-NEXT: vextractf64x2 $1, %zmm3, %xmm4
+; AVX512DQ-NEXT: vextractf32x4 $1, %zmm7, %xmm2
+; AVX512DQ-NEXT: vextractf32x4 $1, %zmm3, %xmm4
; AVX512DQ-NEXT: vucomisd %xmm2, %xmm4
; AVX512DQ-NEXT: movl $0, %edx
; AVX512DQ-NEXT: cmovaq %rcx, %rdx
@@ -9395,7 +8899,7 @@ define <32 x i1> @test_cmp_v32f64(<32 x
; AVX512DQ-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm2, %zmm1
; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm8, %ymm0
; AVX512DQ-NEXT: retq
@@ -10512,9 +10016,9 @@ define <32 x i1> @test_cmp_v32i64(<32 x
;
; AVX512DQ-LABEL: test_cmp_v32i64:
; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vextracti64x2 $3, %zmm4, %xmm8
+; AVX512DQ-NEXT: vextracti32x4 $3, %zmm4, %xmm8
; AVX512DQ-NEXT: vpextrq $1, %xmm8, %rcx
-; AVX512DQ-NEXT: vextracti64x2 $3, %zmm0, %xmm9
+; AVX512DQ-NEXT: vextracti32x4 $3, %zmm0, %xmm9
; AVX512DQ-NEXT: vpextrq $1, %xmm9, %rdx
; AVX512DQ-NEXT: xorl %eax, %eax
; AVX512DQ-NEXT: cmpq %rcx, %rdx
@@ -10529,9 +10033,9 @@ define <32 x i1> @test_cmp_v32i64(<32 x
; AVX512DQ-NEXT: cmovgq %rcx, %rdx
; AVX512DQ-NEXT: vmovq %rdx, %xmm8
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm8[0],xmm10[0]
-; AVX512DQ-NEXT: vextracti64x2 $2, %zmm4, %xmm9
+; AVX512DQ-NEXT: vextracti32x4 $2, %zmm4, %xmm9
; AVX512DQ-NEXT: vpextrq $1, %xmm9, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $2, %zmm0, %xmm10
+; AVX512DQ-NEXT: vextracti32x4 $2, %zmm0, %xmm10
; AVX512DQ-NEXT: vpextrq $1, %xmm10, %rsi
; AVX512DQ-NEXT: cmpq %rdx, %rsi
; AVX512DQ-NEXT: movl $0, %edx
@@ -10545,9 +10049,9 @@ define <32 x i1> @test_cmp_v32i64(<32 x
; AVX512DQ-NEXT: vmovq %rdx, %xmm9
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm9[0],xmm11[0]
; AVX512DQ-NEXT: vinserti128 $1, %xmm8, %ymm9, %ymm8
-; AVX512DQ-NEXT: vextracti64x2 $1, %zmm4, %xmm9
+; AVX512DQ-NEXT: vextracti32x4 $1, %zmm4, %xmm9
; AVX512DQ-NEXT: vpextrq $1, %xmm9, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $1, %zmm0, %xmm10
+; AVX512DQ-NEXT: vextracti32x4 $1, %zmm0, %xmm10
; AVX512DQ-NEXT: vpextrq $1, %xmm10, %rsi
; AVX512DQ-NEXT: cmpq %rdx, %rsi
; AVX512DQ-NEXT: movl $0, %edx
@@ -10576,9 +10080,9 @@ define <32 x i1> @test_cmp_v32i64(<32 x
; AVX512DQ-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm8, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm8
-; AVX512DQ-NEXT: vextracti64x2 $3, %zmm5, %xmm4
+; AVX512DQ-NEXT: vextracti32x4 $3, %zmm5, %xmm4
; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $3, %zmm1, %xmm0
+; AVX512DQ-NEXT: vextracti32x4 $3, %zmm1, %xmm0
; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rsi
; AVX512DQ-NEXT: cmpq %rdx, %rsi
; AVX512DQ-NEXT: movl $0, %edx
@@ -10591,9 +10095,9 @@ define <32 x i1> @test_cmp_v32i64(<32 x
; AVX512DQ-NEXT: cmovgq %rcx, %rdx
; AVX512DQ-NEXT: vmovq %rdx, %xmm0
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm9 = xmm0[0],xmm9[0]
-; AVX512DQ-NEXT: vextracti64x2 $2, %zmm5, %xmm4
+; AVX512DQ-NEXT: vextracti32x4 $2, %zmm5, %xmm4
; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $2, %zmm1, %xmm0
+; AVX512DQ-NEXT: vextracti32x4 $2, %zmm1, %xmm0
; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rsi
; AVX512DQ-NEXT: cmpq %rdx, %rsi
; AVX512DQ-NEXT: movl $0, %edx
@@ -10607,9 +10111,9 @@ define <32 x i1> @test_cmp_v32i64(<32 x
; AVX512DQ-NEXT: vmovq %rdx, %xmm0
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm10[0]
; AVX512DQ-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm9
-; AVX512DQ-NEXT: vextracti64x2 $1, %zmm5, %xmm0
+; AVX512DQ-NEXT: vextracti32x4 $1, %zmm5, %xmm0
; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $1, %zmm1, %xmm4
+; AVX512DQ-NEXT: vextracti32x4 $1, %zmm1, %xmm4
; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rsi
; AVX512DQ-NEXT: cmpq %rdx, %rsi
; AVX512DQ-NEXT: movl $0, %edx
@@ -10638,11 +10142,11 @@ define <32 x i1> @test_cmp_v32i64(<32 x
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm9, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm0, %zmm8, %zmm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm8, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm8
-; AVX512DQ-NEXT: vextracti64x2 $3, %zmm6, %xmm1
+; AVX512DQ-NEXT: vextracti32x4 $3, %zmm6, %xmm1
; AVX512DQ-NEXT: vpextrq $1, %xmm1, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $3, %zmm2, %xmm4
+; AVX512DQ-NEXT: vextracti32x4 $3, %zmm2, %xmm4
; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rsi
; AVX512DQ-NEXT: cmpq %rdx, %rsi
; AVX512DQ-NEXT: movl $0, %edx
@@ -10655,9 +10159,9 @@ define <32 x i1> @test_cmp_v32i64(<32 x
; AVX512DQ-NEXT: cmovgq %rcx, %rdx
; AVX512DQ-NEXT: vmovq %rdx, %xmm1
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm5[0]
-; AVX512DQ-NEXT: vextracti64x2 $2, %zmm6, %xmm4
+; AVX512DQ-NEXT: vextracti32x4 $2, %zmm6, %xmm4
; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $2, %zmm2, %xmm5
+; AVX512DQ-NEXT: vextracti32x4 $2, %zmm2, %xmm5
; AVX512DQ-NEXT: vpextrq $1, %xmm5, %rsi
; AVX512DQ-NEXT: cmpq %rdx, %rsi
; AVX512DQ-NEXT: movl $0, %edx
@@ -10671,9 +10175,9 @@ define <32 x i1> @test_cmp_v32i64(<32 x
; AVX512DQ-NEXT: vmovq %rdx, %xmm4
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm4[0],xmm0[0]
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
-; AVX512DQ-NEXT: vextracti64x2 $1, %zmm6, %xmm0
+; AVX512DQ-NEXT: vextracti32x4 $1, %zmm6, %xmm0
; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $1, %zmm2, %xmm4
+; AVX512DQ-NEXT: vextracti32x4 $1, %zmm2, %xmm4
; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rsi
; AVX512DQ-NEXT: cmpq %rdx, %rsi
; AVX512DQ-NEXT: movl $0, %edx
@@ -10702,9 +10206,9 @@ define <32 x i1> @test_cmp_v32i64(<32 x
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm1
-; AVX512DQ-NEXT: vextracti64x2 $3, %zmm7, %xmm0
+; AVX512DQ-NEXT: vextracti32x4 $3, %zmm7, %xmm0
; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $3, %zmm3, %xmm2
+; AVX512DQ-NEXT: vextracti32x4 $3, %zmm3, %xmm2
; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rsi
; AVX512DQ-NEXT: cmpq %rdx, %rsi
; AVX512DQ-NEXT: movl $0, %edx
@@ -10717,9 +10221,9 @@ define <32 x i1> @test_cmp_v32i64(<32 x
; AVX512DQ-NEXT: cmovgq %rcx, %rdx
; AVX512DQ-NEXT: vmovq %rdx, %xmm0
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0]
-; AVX512DQ-NEXT: vextracti64x2 $2, %zmm7, %xmm2
+; AVX512DQ-NEXT: vextracti32x4 $2, %zmm7, %xmm2
; AVX512DQ-NEXT: vpextrq $1, %xmm2, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $2, %zmm3, %xmm4
+; AVX512DQ-NEXT: vextracti32x4 $2, %zmm3, %xmm4
; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rsi
; AVX512DQ-NEXT: cmpq %rdx, %rsi
; AVX512DQ-NEXT: movl $0, %edx
@@ -10733,9 +10237,9 @@ define <32 x i1> @test_cmp_v32i64(<32 x
; AVX512DQ-NEXT: vmovq %rdx, %xmm2
; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm5[0]
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm2
-; AVX512DQ-NEXT: vextracti64x2 $1, %zmm7, %xmm0
+; AVX512DQ-NEXT: vextracti32x4 $1, %zmm7, %xmm0
; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rdx
-; AVX512DQ-NEXT: vextracti64x2 $1, %zmm3, %xmm4
+; AVX512DQ-NEXT: vextracti32x4 $1, %zmm3, %xmm4
; AVX512DQ-NEXT: vpextrq $1, %xmm4, %rsi
; AVX512DQ-NEXT: cmpq %rdx, %rsi
; AVX512DQ-NEXT: movl $0, %edx
@@ -10763,7 +10267,7 @@ define <32 x i1> @test_cmp_v32i64(<32 x
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm3, %ymm0
; AVX512DQ-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm0, %zmm1, %zmm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm8, %ymm0
; AVX512DQ-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll Thu Aug 17 08:40:25 2017
@@ -274,7 +274,7 @@ define <16 x i32> @shuffle_v16i32_0_1_2_
define <8 x i32> @test_v16i32_1_3_5_7_9_11_13_15(<16 x i32> %v) {
; ALL-LABEL: test_v16i32_1_3_5_7_9_11_13_15:
; ALL: # BB#0:
-; ALL-NEXT: vextractf32x8 $1, %zmm0, %ymm1
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
; ALL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3]
; ALL-NEXT: retq
@@ -314,7 +314,7 @@ define <8 x float> @shuffle_v16f32_extra
define <8 x float> @test_v16f32_0_1_2_3_4_6_7_10 (<16 x float> %v) {
; ALL-LABEL: test_v16f32_0_1_2_3_4_6_7_10:
; ALL: # BB#0:
-; ALL-NEXT: vextractf32x8 $1, %zmm0, %ymm1
+; ALL-NEXT: vextractf64x4 $1, %zmm0, %ymm1
; ALL-NEXT: vmovsldup {{.*#+}} xmm1 = xmm1[0,0,2,2]
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,7,u]
@@ -684,7 +684,7 @@ define <16 x i32> @mask_shuffle_v4i32_v1
; ALL: # BB#0:
; ALL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
; ALL-NEXT: retq
%res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x i32> %res
@@ -695,7 +695,7 @@ define <16 x float> @mask_shuffle_v4f32_
; ALL: # BB#0:
; ALL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; ALL-NEXT: vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT: vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
; ALL-NEXT: retq
%res = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x float> %res
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-avx512.ll Thu Aug 17 08:40:25 2017
@@ -474,7 +474,7 @@ define <16 x float> @expand13(<8 x float
; SKX64-LABEL: expand13:
; SKX64: # BB#0:
; SKX64-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; SKX64-NEXT: vinsertf32x8 $1, %ymm0, %zmm1, %zmm0
+; SKX64-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; SKX64-NEXT: retq
;
; KNL64-LABEL: expand13:
@@ -486,7 +486,7 @@ define <16 x float> @expand13(<8 x float
; SKX32-LABEL: expand13:
; SKX32: # BB#0:
; SKX32-NEXT: vxorps %xmm1, %xmm1, %xmm1
-; SKX32-NEXT: vinsertf32x8 $1, %ymm0, %zmm1, %zmm0
+; SKX32-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0
; SKX32-NEXT: retl
;
; KNL32-LABEL: expand13:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-v1.ll Thu Aug 17 08:40:25 2017
@@ -171,7 +171,7 @@ define <8 x i1> @shuf8i1_u_2_u_u_2_u_2_u
; VL_BW_DQ: # BB#0:
; VL_BW_DQ-NEXT: kmovd %edi, %k0
; VL_BW_DQ-NEXT: vpmovm2q %k0, %zmm0
-; VL_BW_DQ-NEXT: vextracti64x2 $1, %zmm0, %xmm0
+; VL_BW_DQ-NEXT: vextracti32x4 $1, %zmm0, %xmm0
; VL_BW_DQ-NEXT: vpbroadcastq %xmm0, %zmm0
; VL_BW_DQ-NEXT: vpmovq2m %zmm0, %k0
; VL_BW_DQ-NEXT: vpmovm2w %k0, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll?rev=311091&r1=311090&r2=311091&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll Thu Aug 17 08:40:25 2017
@@ -257,38 +257,16 @@ define <16 x i8> @trunc_add_v16i64_v16i8
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: trunc_add_v16i64_v16i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vpaddq %zmm3, %zmm1, %zmm1
-; AVX512F-NEXT: vpaddq %zmm2, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: trunc_add_v16i64_v16i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpaddq %zmm3, %zmm1, %zmm1
-; AVX512BW-NEXT: vpaddq %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: trunc_add_v16i64_v16i8:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vpaddq %zmm3, %zmm1, %zmm1
-; AVX512DQ-NEXT: vpaddq %zmm2, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: trunc_add_v16i64_v16i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpaddq %zmm3, %zmm1, %zmm1
+; AVX512-NEXT: vpaddq %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = add <16 x i64> %a0, %a1
%2 = trunc <16 x i64> %1 to <16 x i8>
ret <16 x i8> %2
@@ -683,35 +661,15 @@ define <16 x i8> @trunc_add_const_v16i64
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: trunc_add_const_v16i64_v16i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: trunc_add_const_v16i64_v16i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: trunc_add_const_v16i64_v16i8:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: trunc_add_const_v16i64_v16i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vpaddb {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = add <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
%2 = trunc <16 x i64> %1 to <16 x i8>
ret <16 x i8> %2
@@ -1084,38 +1042,16 @@ define <16 x i8> @trunc_sub_v16i64_v16i8
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: trunc_sub_v16i64_v16i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vpsubq %zmm3, %zmm1, %zmm1
-; AVX512F-NEXT: vpsubq %zmm2, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: trunc_sub_v16i64_v16i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpsubq %zmm3, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsubq %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: trunc_sub_v16i64_v16i8:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vpsubq %zmm3, %zmm1, %zmm1
-; AVX512DQ-NEXT: vpsubq %zmm2, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: trunc_sub_v16i64_v16i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpsubq %zmm3, %zmm1, %zmm1
+; AVX512-NEXT: vpsubq %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = sub <16 x i64> %a0, %a1
%2 = trunc <16 x i64> %1 to <16 x i8>
ret <16 x i8> %2
@@ -1510,38 +1446,16 @@ define <16 x i8> @trunc_sub_const_v16i64
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: trunc_sub_const_v16i64_v16i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vpsubq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512F-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: trunc_sub_const_v16i64_v16i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpsubq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512BW-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: trunc_sub_const_v16i64_v16i8:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vpsubq {{.*}}(%rip), %zmm1, %zmm1
-; AVX512DQ-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: trunc_sub_const_v16i64_v16i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpsubq {{.*}}(%rip), %zmm1, %zmm1
+; AVX512-NEXT: vpsubq {{.*}}(%rip), %zmm0, %zmm0
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = sub <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
%2 = trunc <16 x i64> %1 to <16 x i8>
ret <16 x i8> %2
@@ -2186,7 +2100,7 @@ define <16 x i8> @trunc_mul_v16i64_v16i8
; AVX512DQ-NEXT: vpmullq %zmm2, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
+; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
@@ -2732,38 +2646,16 @@ define <16 x i8> @trunc_mul_const_v16i64
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: trunc_mul_const_v16i64_v16i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vpmulld {{.*}}(%rip), %ymm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: trunc_mul_const_v16i64_v16i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vpmulld {{.*}}(%rip), %ymm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: trunc_mul_const_v16i64_v16i8:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vpmulld {{.*}}(%rip), %ymm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: trunc_mul_const_v16i64_v16i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vpmulld {{.*}}(%rip), %ymm0, %ymm0
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vpmulld {{.*}}(%rip), %ymm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = mul <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
%2 = trunc <16 x i64> %1 to <16 x i8>
ret <16 x i8> %2
@@ -3157,38 +3049,16 @@ define <16 x i8> @trunc_and_v16i64_v16i8
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: trunc_and_v16i64_v16i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vpandq %zmm3, %zmm1, %zmm1
-; AVX512F-NEXT: vpandq %zmm2, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: trunc_and_v16i64_v16i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpandq %zmm3, %zmm1, %zmm1
-; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: trunc_and_v16i64_v16i8:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vpandq %zmm3, %zmm1, %zmm1
-; AVX512DQ-NEXT: vpandq %zmm2, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: trunc_and_v16i64_v16i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpandq %zmm3, %zmm1, %zmm1
+; AVX512-NEXT: vpandq %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = and <16 x i64> %a0, %a1
%2 = trunc <16 x i64> %1 to <16 x i8>
ret <16 x i8> %2
@@ -3528,35 +3398,15 @@ define <16 x i8> @trunc_and_const_v16i64
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: trunc_and_const_v16i64_v16i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: trunc_and_const_v16i64_v16i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: trunc_and_const_v16i64_v16i8:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: trunc_and_const_v16i64_v16i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = and <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
%2 = trunc <16 x i64> %1 to <16 x i8>
ret <16 x i8> %2
@@ -3913,38 +3763,16 @@ define <16 x i8> @trunc_xor_v16i64_v16i8
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: trunc_xor_v16i64_v16i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vpxorq %zmm3, %zmm1, %zmm1
-; AVX512F-NEXT: vpxorq %zmm2, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: trunc_xor_v16i64_v16i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpxorq %zmm3, %zmm1, %zmm1
-; AVX512BW-NEXT: vpxorq %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: trunc_xor_v16i64_v16i8:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vpxorq %zmm3, %zmm1, %zmm1
-; AVX512DQ-NEXT: vpxorq %zmm2, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: trunc_xor_v16i64_v16i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpxorq %zmm3, %zmm1, %zmm1
+; AVX512-NEXT: vpxorq %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = xor <16 x i64> %a0, %a1
%2 = trunc <16 x i64> %1 to <16 x i8>
ret <16 x i8> %2
@@ -4284,35 +4112,15 @@ define <16 x i8> @trunc_xor_const_v16i64
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: trunc_xor_const_v16i64_v16i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: trunc_xor_const_v16i64_v16i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: trunc_xor_const_v16i64_v16i8:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: trunc_xor_const_v16i64_v16i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = xor <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
%2 = trunc <16 x i64> %1 to <16 x i8>
ret <16 x i8> %2
@@ -4669,38 +4477,16 @@ define <16 x i8> @trunc_or_v16i64_v16i8(
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: trunc_or_v16i64_v16i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vporq %zmm3, %zmm1, %zmm1
-; AVX512F-NEXT: vporq %zmm2, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: trunc_or_v16i64_v16i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vporq %zmm3, %zmm1, %zmm1
-; AVX512BW-NEXT: vporq %zmm2, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: trunc_or_v16i64_v16i8:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vporq %zmm3, %zmm1, %zmm1
-; AVX512DQ-NEXT: vporq %zmm2, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: trunc_or_v16i64_v16i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vporq %zmm3, %zmm1, %zmm1
+; AVX512-NEXT: vporq %zmm2, %zmm0, %zmm0
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = or <16 x i64> %a0, %a1
%2 = trunc <16 x i64> %1 to <16 x i8>
ret <16 x i8> %2
@@ -5040,35 +4826,15 @@ define <16 x i8> @trunc_or_const_v16i64_
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
-; AVX512F-LABEL: trunc_or_const_v16i64_v16i8:
-; AVX512F: # BB#0:
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512F-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
-; AVX512F-NEXT: vzeroupper
-; AVX512F-NEXT: retq
-;
-; AVX512BW-LABEL: trunc_or_const_v16i64_v16i8:
-; AVX512BW: # BB#0:
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512BW-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512BW-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BW-NEXT: vzeroupper
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: trunc_or_const_v16i64_v16i8:
-; AVX512DQ: # BB#0:
-; AVX512DQ-NEXT: vpmovqd %zmm0, %ymm0
-; AVX512DQ-NEXT: vpmovqd %zmm1, %ymm1
-; AVX512DQ-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512DQ-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
-; AVX512DQ-NEXT: vzeroupper
-; AVX512DQ-NEXT: retq
+; AVX512-LABEL: trunc_or_const_v16i64_v16i8:
+; AVX512: # BB#0:
+; AVX512-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512-NEXT: vpmovqd %zmm1, %ymm1
+; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512-NEXT: vzeroupper
+; AVX512-NEXT: retq
%1 = or <16 x i64> %a0, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
%2 = trunc <16 x i64> %1 to <16 x i8>
ret <16 x i8> %2
More information about the llvm-commits
mailing list