[llvm] r319337 - [X86][AVX512] Tag 3OP (shuffles, double-shifts and GFNI) instructions scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 29 10:52:20 PST 2017
Author: rksimon
Date: Wed Nov 29 10:52:20 2017
New Revision: 319337
URL: http://llvm.org/viewvc/llvm-project?rev=319337&view=rev
Log:
[X86][AVX512] Tag 3OP (shuffles, double-shifts and GFNI) instructions scheduler classes
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319337&r1=319336&r2=319337&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Nov 29 10:52:20 2017
@@ -8655,21 +8655,24 @@ multiclass avx512_fp_packed_imm<bits<8>
//handle instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
// op(reg_vec2,mem_vec,imm)
multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
+ OpndItins itins, X86VectorVTInfo DestInfo,
+ X86VectorVTInfo SrcInfo>{
let ExeDomain = DestInfo.ExeDomain in {
defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT SrcInfo.RC:$src2),
- (i8 imm:$src3)))>;
+ (i8 imm:$src3))), itins.rr>,
+ Sched<[itins.Sched]>;
defm rmi : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst),
(ins SrcInfo.RC:$src1, SrcInfo.MemOp:$src2, u8imm:$src3),
OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
(DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1),
(SrcInfo.VT (bitconvert
(SrcInfo.LdFrag addr:$src2))),
- (i8 imm:$src3)))>;
+ (i8 imm:$src3))), itins.rm>,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
@@ -8677,8 +8680,8 @@ multiclass avx512_3Op_rm_imm8<bits<8> op
// op(reg_vec2,mem_vec,imm)
// op(reg_vec2,broadcast(eltVt),imm)
multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
- X86VectorVTInfo _>:
- avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
+ OpndItins itins, X86VectorVTInfo _>:
+ avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, itins, _, _>{
let ExeDomain = _.ExeDomain in
defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -8687,7 +8690,8 @@ multiclass avx512_3Op_imm8<bits<8> opc,
"$src1, ${src2}"##_.BroadcastStr##", $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
- (i8 imm:$src3))>, EVEX_B;
+ (i8 imm:$src3)), itins.rm>, EVEX_B,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
//handle scalar instruction reg_vec1 = op(reg_vec2,reg_vec3,imm)
@@ -8756,29 +8760,29 @@ multiclass avx512_common_fp_sae_packed_i
}
multiclass avx512_common_3Op_rm_imm8<bits<8> opc, SDNode OpNode, string OpStr,
- AVX512VLVectorVTInfo DestInfo, AVX512VLVectorVTInfo SrcInfo,
- Predicate Pred = HasBWI> {
+ OpndItins itins, AVX512VLVectorVTInfo DestInfo,
+ AVX512VLVectorVTInfo SrcInfo, Predicate Pred = HasBWI> {
let Predicates = [Pred] in {
- defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info512,
+ defm Z : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info512,
SrcInfo.info512>, EVEX_V512, AVX512AIi8Base, EVEX_4V;
}
let Predicates = [Pred, HasVLX] in {
- defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info128,
+ defm Z128 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info128,
SrcInfo.info128>, EVEX_V128, AVX512AIi8Base, EVEX_4V;
- defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, DestInfo.info256,
+ defm Z256 : avx512_3Op_rm_imm8<opc, OpStr, OpNode, itins, DestInfo.info256,
SrcInfo.info256>, EVEX_V256, AVX512AIi8Base, EVEX_4V;
}
}
multiclass avx512_common_3Op_imm8<string OpcodeStr, AVX512VLVectorVTInfo _,
- bits<8> opc, SDNode OpNode,
+ bits<8> opc, SDNode OpNode, OpndItins itins,
Predicate Pred = HasAVX512> {
let Predicates = [Pred] in {
- defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info512>, EVEX_V512;
+ defm Z : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info512>, EVEX_V512;
}
let Predicates = [Pred, HasVLX] in {
- defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info128>, EVEX_V128;
- defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, _.info256>, EVEX_V256;
+ defm Z128 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info128>, EVEX_V128;
+ defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, OpNode, itins, _.info256>, EVEX_V256;
}
}
@@ -8918,25 +8922,25 @@ def : Pat<(v4f64 (ftrunc VR256X:$src)),
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0xB))>;
}
-multiclass avx512_shuff_packed_128<string OpcodeStr, AVX512VLVectorVTInfo _,
- bits<8> opc>{
+multiclass avx512_shuff_packed_128<string OpcodeStr, OpndItins itins,
+ AVX512VLVectorVTInfo _, bits<8> opc>{
let Predicates = [HasAVX512] in {
- defm Z : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, _.info512>, EVEX_V512;
+ defm Z : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info512>, EVEX_V512;
}
let Predicates = [HasAVX512, HasVLX] in {
- defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, _.info256>, EVEX_V256;
+ defm Z256 : avx512_3Op_imm8<opc, OpcodeStr, X86Shuf128, itins, _.info256>, EVEX_V256;
}
}
-defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4",avx512vl_f32_info, 0x23>,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
-defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2",avx512vl_f64_info, 0x23>,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
-defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4",avx512vl_i32_info, 0x43>,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
-defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2",avx512vl_i64_info, 0x43>,
- AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VSHUFF32X4 : avx512_shuff_packed_128<"vshuff32x4", SSE_SHUFP,
+ avx512vl_f32_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+defm VSHUFF64X2 : avx512_shuff_packed_128<"vshuff64x2", SSE_SHUFP,
+ avx512vl_f64_info, 0x23>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+defm VSHUFI32X4 : avx512_shuff_packed_128<"vshufi32x4", SSE_SHUFP,
+ avx512vl_i32_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+defm VSHUFI64X2 : avx512_shuff_packed_128<"vshufi64x2", SSE_SHUFP,
+ avx512vl_i64_info, 0x43>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
let Predicates = [HasAVX512] in {
// Provide fallback in case the load node that is used in the broadcast
@@ -8971,17 +8975,18 @@ def : Pat<(v64i8 (X86SubVBroadcast (v16i
0)>;
}
-multiclass avx512_valign<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I> {
- defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign>,
+multiclass avx512_valign<string OpcodeStr, OpndItins itins,
+ AVX512VLVectorVTInfo VTInfo_I> {
+ defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_I, 0x03, X86VAlign, itins>,
AVX512AIi8Base, EVEX_4V;
}
-defm VALIGND: avx512_valign<"valignd", avx512vl_i32_info>,
+defm VALIGND: avx512_valign<"valignd", SSE_PALIGN, avx512vl_i32_info>,
EVEX_CD8<32, CD8VF>;
-defm VALIGNQ: avx512_valign<"valignq", avx512vl_i64_info>,
+defm VALIGNQ: avx512_valign<"valignq", SSE_PALIGN, avx512vl_i64_info>,
EVEX_CD8<64, CD8VF>, VEX_W;
-defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr" ,
+defm VPALIGNR: avx512_common_3Op_rm_imm8<0x0F, X86PAlignr, "vpalignr", SSE_PALIGN,
avx512vl_i8_info, avx512vl_i8_info>,
EVEX_CD8<8, CD8VF>;
@@ -9102,8 +9107,9 @@ let Predicates = [HasVLX, HasBWI] in {
v16i8x_info, ValigndImm8XForm>;
}
-defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw" ,
- avx512vl_i16_info, avx512vl_i8_info>, EVEX_CD8<8, CD8VF>;
+defm VDBPSADBW: avx512_common_3Op_rm_imm8<0x42, X86dbpsadbw, "vdbpsadbw",
+ SSE_INTMUL_ITINS_P, avx512vl_i16_info, avx512vl_i8_info>,
+ EVEX_CD8<8, CD8VF>;
multiclass avx512_unary_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpndItins itins, X86VectorVTInfo _> {
@@ -9509,18 +9515,21 @@ defm VPINSRWZ : avx512_insert_elt_bw<0xC
extloadi16>, PD, VEX_WIG;
defm VPINSRDZ : avx512_insert_elt_dq<0x22, "vpinsrd", v4i32x_info, GR32>;
defm VPINSRQZ : avx512_insert_elt_dq<0x22, "vpinsrq", v2i64x_info, GR64>, VEX_W;
+
//===----------------------------------------------------------------------===//
// VSHUFPS - VSHUFPD Operations
//===----------------------------------------------------------------------===//
+
multiclass avx512_shufp<string OpcodeStr, AVX512VLVectorVTInfo VTInfo_I,
AVX512VLVectorVTInfo VTInfo_FP>{
- defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp>,
- EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
- AVX512AIi8Base, EVEX_4V;
+ defm NAME: avx512_common_3Op_imm8<OpcodeStr, VTInfo_FP, 0xC6, X86Shufp,
+ SSE_SHUFP>, EVEX_CD8<VTInfo_FP.info512.EltSize, CD8VF>,
+ AVX512AIi8Base, EVEX_4V;
}
defm VSHUFPS: avx512_shufp<"vshufps", avx512vl_i32_info, avx512vl_f32_info>, PS;
defm VSHUFPD: avx512_shufp<"vshufpd", avx512vl_i64_info, avx512vl_f64_info>, PD, VEX_W;
+
//===----------------------------------------------------------------------===//
// AVX-512 - Byte shift Left/Right
//===----------------------------------------------------------------------===//
@@ -10101,26 +10110,27 @@ defm : vpclmulqdq_aliases<"VPCLMULQDQZ25
//===----------------------------------------------------------------------===//
multiclass VBMI2_shift_var_rm<bits<8> Op, string OpStr, SDNode OpNode,
- X86VectorVTInfo VTI> {
+ OpndItins itins, X86VectorVTInfo VTI> {
let Constraints = "$src1 = $dst",
ExeDomain = VTI.ExeDomain in {
defm r: AVX512_maskable_3src<Op, MRMSrcReg, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.RC:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
- (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3))>,
- AVX512FMA3Base;
+ (VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2, VTI.RC:$src3)),
+ itins.rr>, AVX512FMA3Base, Sched<[itins.Sched]>;
defm m: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src2, VTI.MemOp:$src3), OpStr,
"$src3, $src2", "$src2, $src3",
(VTI.VT (OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (bitconvert (VTI.LdFrag addr:$src3)))))>,
- AVX512FMA3Base;
+ (VTI.VT (bitconvert (VTI.LdFrag addr:$src3))))),
+ itins.rm>, AVX512FMA3Base,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
}
multiclass VBMI2_shift_var_rmb<bits<8> Op, string OpStr, SDNode OpNode,
- X86VectorVTInfo VTI>
- : VBMI2_shift_var_rm<Op, OpStr, OpNode, VTI> {
+ OpndItins itins, X86VectorVTInfo VTI>
+ : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI> {
let Constraints = "$src1 = $dst",
ExeDomain = VTI.ExeDomain in
defm mb: AVX512_maskable_3src<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
@@ -10128,54 +10138,57 @@ multiclass VBMI2_shift_var_rmb<bits<8> O
"${src3}"##VTI.BroadcastStr##", $src2",
"$src2, ${src3}"##VTI.BroadcastStr,
(OpNode VTI.RC:$src1, VTI.RC:$src2,
- (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3))))>,
- AVX512FMA3Base, EVEX_B;
+ (VTI.VT (X86VBroadcast (VTI.ScalarLdFrag addr:$src3)))),
+ itins.rm>, AVX512FMA3Base, EVEX_B,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
multiclass VBMI2_shift_var_rm_common<bits<8> Op, string OpStr, SDNode OpNode,
- AVX512VLVectorVTInfo VTI> {
+ OpndItins itins, AVX512VLVectorVTInfo VTI> {
let Predicates = [HasVBMI2] in
- defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, VTI.info512>, EVEX_V512;
+ defm Z : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
let Predicates = [HasVBMI2, HasVLX] in {
- defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, VTI.info256>, EVEX_V256;
- defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, VTI.info128>, EVEX_V128;
+ defm Z256 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
+ defm Z128 : VBMI2_shift_var_rm<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
}
}
multiclass VBMI2_shift_var_rmb_common<bits<8> Op, string OpStr, SDNode OpNode,
- AVX512VLVectorVTInfo VTI> {
+ OpndItins itins, AVX512VLVectorVTInfo VTI> {
let Predicates = [HasVBMI2] in
- defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, VTI.info512>, EVEX_V512;
+ defm Z : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info512>, EVEX_V512;
let Predicates = [HasVBMI2, HasVLX] in {
- defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, VTI.info256>, EVEX_V256;
- defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, VTI.info128>, EVEX_V128;
+ defm Z256 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info256>, EVEX_V256;
+ defm Z128 : VBMI2_shift_var_rmb<Op, OpStr, OpNode, itins, VTI.info128>, EVEX_V128;
}
}
multiclass VBMI2_shift_var<bits<8> wOp, bits<8> dqOp, string Prefix,
- SDNode OpNode> {
- defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode,
+ SDNode OpNode, OpndItins itins> {
+ defm W : VBMI2_shift_var_rm_common<wOp, Prefix##"w", OpNode, itins,
avx512vl_i16_info>, VEX_W, EVEX_CD8<16, CD8VF>;
- defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode,
+ defm D : VBMI2_shift_var_rmb_common<dqOp, Prefix##"d", OpNode, itins,
avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
- defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode,
+ defm Q : VBMI2_shift_var_rmb_common<dqOp, Prefix##"q", OpNode, itins,
avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
}
multiclass VBMI2_shift_imm<bits<8> wOp, bits<8> dqOp, string Prefix,
- SDNode OpNode> {
- defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", avx512vl_i16_info,
- avx512vl_i16_info, HasVBMI2>, VEX_W, EVEX_CD8<16, CD8VF>;
+ SDNode OpNode, OpndItins itins> {
+ defm W : avx512_common_3Op_rm_imm8<wOp, OpNode, Prefix##"w", itins,
+ avx512vl_i16_info, avx512vl_i16_info, HasVBMI2>,
+ VEX_W, EVEX_CD8<16, CD8VF>;
defm D : avx512_common_3Op_imm8<Prefix##"d", avx512vl_i32_info, dqOp,
- OpNode, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
+ OpNode, itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<32, CD8VF>;
defm Q : avx512_common_3Op_imm8<Prefix##"q", avx512vl_i64_info, dqOp, OpNode,
- HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
+ itins, HasVBMI2>, AVX512AIi8Base, EVEX_4V, EVEX_CD8<64, CD8VF>, VEX_W;
}
// Concat & Shift
-defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv>;
-defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv>;
-defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld>;
-defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd>;
+defm VPSHLDV : VBMI2_shift_var<0x70, 0x71, "vpshldv", X86VShldv, SSE_INTMUL_ITINS_P>;
+defm VPSHRDV : VBMI2_shift_var<0x72, 0x73, "vpshrdv", X86VShrdv, SSE_INTMUL_ITINS_P>;
+defm VPSHLD : VBMI2_shift_imm<0x70, 0x71, "vpshld", X86VShld, SSE_INTMUL_ITINS_P>;
+defm VPSHRD : VBMI2_shift_imm<0x72, 0x73, "vpshrd", X86VShrd, SSE_INTMUL_ITINS_P>;
+
// Compress
defm VPCOMPRESSB : compress_by_elt_width <0x63, "vpcompressb", avx512vl_i8_info,
HasVBMI2>, EVEX;
@@ -10290,9 +10303,9 @@ defm GF2P8MULB : GF2P8MULB_avx512_common
EVEX_CD8<8, CD8VF>, T8PD;
multiclass GF2P8AFFINE_avx512_rmb_imm<bits<8> Op, string OpStr, SDNode OpNode,
- X86VectorVTInfo VTI,
+ OpndItins itins, X86VectorVTInfo VTI,
X86VectorVTInfo BcstVTI>
- : avx512_3Op_rm_imm8<Op, OpStr, OpNode, VTI, VTI> {
+ : avx512_3Op_rm_imm8<Op, OpStr, OpNode, itins, VTI, VTI> {
let ExeDomain = VTI.ExeDomain in
defm rmbi : AVX512_maskable<Op, MRMSrcMem, VTI, (outs VTI.RC:$dst),
(ins VTI.RC:$src1, VTI.ScalarMemOp:$src2, u8imm:$src3),
@@ -10300,25 +10313,27 @@ multiclass GF2P8AFFINE_avx512_rmb_imm<bi
"$src1, ${src2}"##BcstVTI.BroadcastStr##", $src3",
(OpNode (VTI.VT VTI.RC:$src1),
(bitconvert (BcstVTI.VT (X86VBroadcast (loadi64 addr:$src2)))),
- (i8 imm:$src3))>, EVEX_B;
+ (i8 imm:$src3)), itins.rm>, EVEX_B,
+ Sched<[itins.Sched.Folded, ReadAfterLd]>;
}
-multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode> {
+multiclass GF2P8AFFINE_avx512_common<bits<8> Op, string OpStr, SDNode OpNode,
+ OpndItins itins> {
let Predicates = [HasGFNI, HasAVX512, HasBWI] in
- defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, v64i8_info,
+ defm Z : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v64i8_info,
v8i64_info>, EVEX_V512;
let Predicates = [HasGFNI, HasVLX, HasBWI] in {
- defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, v32i8x_info,
+ defm Z256 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v32i8x_info,
v4i64x_info>, EVEX_V256;
- defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, v16i8x_info,
+ defm Z128 : GF2P8AFFINE_avx512_rmb_imm<Op, OpStr, OpNode, itins, v16i8x_info,
v2i64x_info>, EVEX_V128;
}
}
defm GF2P8AFFINEINVQB : GF2P8AFFINE_avx512_common<0xCF, "vgf2p8affineinvqb",
- X86GF2P8affineinvqb>,
+ X86GF2P8affineinvqb, SSE_INTMUL_ITINS_P>,
EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
defm GF2P8AFFINEQB : GF2P8AFFINE_avx512_common<0xCE, "vgf2p8affineqb",
- X86GF2P8affineqb>,
+ X86GF2P8affineqb, SSE_INTMUL_ITINS_P>,
EVEX_4V, EVEX_CD8<8, CD8VF>, VEX_W, AVX512AIi8Base;
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=319337&r1=319336&r2=319337&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Wed Nov 29 10:52:20 2017
@@ -7331,7 +7331,7 @@ define <64 x i8> @vmov_test16(i64 %x) {
; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1
; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %zmm0, %k0
; GENERIC-NEXT: vpmovm2b %k0, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
@@ -7371,7 +7371,7 @@ define <64 x i8> @vmov_test17(i64 %x, i3
; GENERIC-NEXT: movl $32, %eax # sched: [1:0.33]
; GENERIC-NEXT: kmovd %eax, %k1
; GENERIC-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vpmovb2m %zmm0, %k0
; GENERIC-NEXT: vpmovm2b %k0, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
Modified: llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll?rev=319337&r1=319336&r2=319337&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-shuffle-schedule.ll Wed Nov 29 10:52:20 2017
@@ -9535,7 +9535,7 @@ define <8 x float> @test2_8xfloat_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9557,7 +9557,7 @@ define <8 x float> @test2_8xfloat_zero_m
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask0:
@@ -9576,7 +9576,7 @@ define <8 x float> @test2_8xfloat_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9598,7 +9598,7 @@ define <8 x float> @test2_8xfloat_zero_m
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask1:
@@ -9617,7 +9617,7 @@ define <8 x float> @test2_8xfloat_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9639,7 +9639,7 @@ define <8 x float> @test2_8xfloat_zero_m
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test2_8xfloat_zero_masked_shuff_mask2:
@@ -9671,7 +9671,7 @@ define <8 x float> @test2_8xfloat_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9693,7 +9693,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_shuff_mask3:
@@ -9726,7 +9726,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9749,7 +9749,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
@@ -9770,7 +9770,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9793,7 +9793,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
@@ -9814,7 +9814,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9837,7 +9837,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
@@ -9872,7 +9872,7 @@ define <8 x float> @test_8xfloat_masked_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovaps %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9895,7 +9895,7 @@ define <8 x float> @test_8xfloat_zero_ma
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
@@ -9914,7 +9914,7 @@ define <8 x float> @test_8xfloat_zero_ma
define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x i32> %mask) {
; GENERIC-LABEL: test_16xfloat_shuff_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_shuff_mask0:
@@ -9929,7 +9929,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
; GENERIC-NEXT: vmovaps %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9951,7 +9951,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask0:
@@ -9970,7 +9970,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00]
; GENERIC-NEXT: vmovaps %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -9992,7 +9992,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask1:
@@ -10011,7 +10011,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vmovaps %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10033,7 +10033,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask2:
@@ -10050,7 +10050,7 @@ define <16 x float> @test_16xfloat_zero_
define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2) {
; GENERIC-LABEL: test_16xfloat_shuff_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_shuff_mask3:
@@ -10065,7 +10065,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00]
; GENERIC-NEXT: vmovaps %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10087,7 +10087,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_shuff_mask3:
@@ -10104,7 +10104,7 @@ define <16 x float> @test_16xfloat_zero_
define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_shuff_mem_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_shuff_mem_mask0:
@@ -10120,7 +10120,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10143,7 +10143,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
@@ -10164,7 +10164,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10187,7 +10187,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
@@ -10208,7 +10208,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [5:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10231,7 +10231,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
@@ -10250,7 +10250,7 @@ define <16 x float> @test_16xfloat_zero_
define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
; GENERIC-LABEL: test_16xfloat_shuff_mem_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_shuff_mem_mask3:
@@ -10266,7 +10266,7 @@ define <16 x float> @test_16xfloat_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
; GENERIC-NEXT: vmovaps %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10289,7 +10289,7 @@ define <16 x float> @test_16xfloat_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15]
+; GENERIC-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
@@ -10323,7 +10323,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10345,7 +10345,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask0:
@@ -10364,7 +10364,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10386,7 +10386,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask1:
@@ -10405,7 +10405,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10427,7 +10427,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask2:
@@ -10459,7 +10459,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10481,7 +10481,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_shuff_mask3:
@@ -10514,7 +10514,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10537,7 +10537,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
@@ -10558,7 +10558,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10581,7 +10581,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
@@ -10602,7 +10602,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10625,7 +10625,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
@@ -10660,7 +10660,7 @@ define <4 x double> @test_4xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovapd %ymm1, %ymm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10683,7 +10683,7 @@ define <4 x double> @test_4xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
@@ -10702,7 +10702,7 @@ define <4 x double> @test_4xdouble_zero_
define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2) {
; GENERIC-LABEL: test_8xdouble_shuff_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_shuff_mask0:
@@ -10717,7 +10717,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00]
; GENERIC-NEXT: vmovapd %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10739,7 +10739,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask0:
@@ -10758,7 +10758,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00]
; GENERIC-NEXT: vmovapd %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10780,7 +10780,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask1:
@@ -10799,7 +10799,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00]
; GENERIC-NEXT: vmovapd %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10821,7 +10821,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask2:
@@ -10838,7 +10838,7 @@ define <8 x double> @test_8xdouble_zero_
define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2) {
; GENERIC-LABEL: test_8xdouble_shuff_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_shuff_mask3:
@@ -10853,7 +10853,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00]
; GENERIC-NEXT: vmovapd %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10875,7 +10875,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_shuff_mask3:
@@ -10892,7 +10892,7 @@ define <8 x double> @test_8xdouble_zero_
define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_shuff_mem_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_shuff_mem_mask0:
@@ -10908,7 +10908,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10931,7 +10931,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
@@ -10952,7 +10952,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -10975,7 +10975,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
@@ -10996,7 +10996,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [5:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11019,7 +11019,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
@@ -11038,7 +11038,7 @@ define <8 x double> @test_8xdouble_zero_
define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
; GENERIC-LABEL: test_8xdouble_shuff_mem_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_shuff_mem_mask3:
@@ -11054,7 +11054,7 @@ define <8 x double> @test_8xdouble_maske
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00]
; GENERIC-NEXT: vmovapd %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11077,7 +11077,7 @@ define <8 x double> @test_8xdouble_zero_
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1]
+; GENERIC-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
@@ -11111,7 +11111,7 @@ define <8 x i32> @test_8xi32_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11133,7 +11133,7 @@ define <8 x i32> @test_8xi32_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_zero_masked_shuff_mask0:
@@ -11152,7 +11152,7 @@ define <8 x i32> @test_8xi32_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11174,7 +11174,7 @@ define <8 x i32> @test_8xi32_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_zero_masked_shuff_mask1:
@@ -11193,7 +11193,7 @@ define <8 x i32> @test_8xi32_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11215,7 +11215,7 @@ define <8 x i32> @test_8xi32_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_zero_masked_shuff_mask2:
@@ -11247,7 +11247,7 @@ define <8 x i32> @test_8xi32_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11269,7 +11269,7 @@ define <8 x i32> @test_8xi32_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_zero_masked_shuff_mask3:
@@ -11302,7 +11302,7 @@ define <8 x i32> @test_8xi32_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11325,7 +11325,7 @@ define <8 x i32> @test_8xi32_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask0:
@@ -11346,7 +11346,7 @@ define <8 x i32> @test_8xi32_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11369,7 +11369,7 @@ define <8 x i32> @test_8xi32_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask1:
@@ -11390,7 +11390,7 @@ define <8 x i32> @test_8xi32_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11413,7 +11413,7 @@ define <8 x i32> @test_8xi32_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask2:
@@ -11448,7 +11448,7 @@ define <8 x i32> @test_8xi32_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11471,7 +11471,7 @@ define <8 x i32> @test_8xi32_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi32_zero_masked_shuff_mem_mask3:
@@ -11490,7 +11490,7 @@ define <8 x i32> @test_8xi32_zero_masked
define <16 x i32> @test_16xi32_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2) {
; GENERIC-LABEL: test_16xi32_shuff_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_shuff_mask0:
@@ -11505,7 +11505,7 @@ define <16 x i32> @test_16xi32_masked_sh
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11527,7 +11527,7 @@ define <16 x i32> @test_16xi32_zero_mask
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_zero_masked_shuff_mask0:
@@ -11546,7 +11546,7 @@ define <16 x i32> @test_16xi32_masked_sh
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11568,7 +11568,7 @@ define <16 x i32> @test_16xi32_zero_mask
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_zero_masked_shuff_mask1:
@@ -11587,7 +11587,7 @@ define <16 x i32> @test_16xi32_masked_sh
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11609,7 +11609,7 @@ define <16 x i32> @test_16xi32_zero_mask
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_zero_masked_shuff_mask2:
@@ -11626,7 +11626,7 @@ define <16 x i32> @test_16xi32_zero_mask
define <16 x i32> @test_16xi32_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2) {
; GENERIC-LABEL: test_16xi32_shuff_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_shuff_mask3:
@@ -11641,7 +11641,7 @@ define <16 x i32> @test_16xi32_masked_sh
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11663,7 +11663,7 @@ define <16 x i32> @test_16xi32_zero_mask
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_zero_masked_shuff_mask3:
@@ -11680,7 +11680,7 @@ define <16 x i32> @test_16xi32_zero_mask
define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) {
; GENERIC-LABEL: test_16xi32_shuff_mem_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_shuff_mem_mask0:
@@ -11696,7 +11696,7 @@ define <16 x i32> @test_16xi32_masked_sh
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11719,7 +11719,7 @@ define <16 x i32> @test_16xi32_zero_mask
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask0:
@@ -11740,7 +11740,7 @@ define <16 x i32> @test_16xi32_masked_sh
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11763,7 +11763,7 @@ define <16 x i32> @test_16xi32_zero_mask
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask1:
@@ -11784,7 +11784,7 @@ define <16 x i32> @test_16xi32_masked_sh
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11807,7 +11807,7 @@ define <16 x i32> @test_16xi32_zero_mask
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask2:
@@ -11826,7 +11826,7 @@ define <16 x i32> @test_16xi32_zero_mask
define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) {
; GENERIC-LABEL: test_16xi32_shuff_mem_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_shuff_mem_mask3:
@@ -11842,7 +11842,7 @@ define <16 x i32> @test_16xi32_masked_sh
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11865,7 +11865,7 @@ define <16 x i32> @test_16xi32_zero_mask
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqd %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15]
+; GENERIC-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_16xi32_zero_masked_shuff_mem_mask3:
@@ -11899,7 +11899,7 @@ define <4 x i64> @test_4xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11921,7 +11921,7 @@ define <4 x i64> @test_4xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_zero_masked_shuff_mask0:
@@ -11940,7 +11940,7 @@ define <4 x i64> @test_4xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -11962,7 +11962,7 @@ define <4 x i64> @test_4xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_zero_masked_shuff_mask1:
@@ -11981,7 +11981,7 @@ define <4 x i64> @test_4xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12003,7 +12003,7 @@ define <4 x i64> @test_4xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_zero_masked_shuff_mask2:
@@ -12035,7 +12035,7 @@ define <4 x i64> @test_4xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm4, %ymm3, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa %ymm2, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12057,7 +12057,7 @@ define <4 x i64> @test_4xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_zero_masked_shuff_mask3:
@@ -12090,7 +12090,7 @@ define <4 x i64> @test_4xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12113,7 +12113,7 @@ define <4 x i64> @test_4xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask0:
@@ -12134,7 +12134,7 @@ define <4 x i64> @test_4xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12157,7 +12157,7 @@ define <4 x i64> @test_4xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask1:
@@ -12178,7 +12178,7 @@ define <4 x i64> @test_4xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12201,7 +12201,7 @@ define <4 x i64> @test_4xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask2:
@@ -12236,7 +12236,7 @@ define <4 x i64> @test_4xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm3, %ymm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa %ymm1, %ymm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12259,7 +12259,7 @@ define <4 x i64> @test_4xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %ymm2, %ymm1, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_4xi64_zero_masked_shuff_mem_mask3:
@@ -12278,7 +12278,7 @@ define <4 x i64> @test_4xi64_zero_masked
define <8 x i64> @test_8xi64_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2) {
; GENERIC-LABEL: test_8xi64_shuff_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_shuff_mask0:
@@ -12293,7 +12293,7 @@ define <8 x i64> @test_8xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12315,7 +12315,7 @@ define <8 x i64> @test_8xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_zero_masked_shuff_mask0:
@@ -12334,7 +12334,7 @@ define <8 x i64> @test_8xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12356,7 +12356,7 @@ define <8 x i64> @test_8xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_zero_masked_shuff_mask1:
@@ -12375,7 +12375,7 @@ define <8 x i64> @test_8xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12397,7 +12397,7 @@ define <8 x i64> @test_8xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_zero_masked_shuff_mask2:
@@ -12414,7 +12414,7 @@ define <8 x i64> @test_8xi64_zero_masked
define <8 x i64> @test_8xi64_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2) {
; GENERIC-LABEL: test_8xi64_shuff_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_shuff_mask3:
@@ -12429,7 +12429,7 @@ define <8 x i64> @test_8xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm4, %xmm4, %xmm4 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm4, %zmm3, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm2, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12451,7 +12451,7 @@ define <8 x i64> @test_8xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3] sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_zero_masked_shuff_mask3:
@@ -12468,7 +12468,7 @@ define <8 x i64> @test_8xi64_zero_masked
define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) {
; GENERIC-LABEL: test_8xi64_shuff_mem_mask0:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_shuff_mem_mask0:
@@ -12484,7 +12484,7 @@ define <8 x i64> @test_8xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12507,7 +12507,7 @@ define <8 x i64> @test_8xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask0:
@@ -12528,7 +12528,7 @@ define <8 x i64> @test_8xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12551,7 +12551,7 @@ define <8 x i64> @test_8xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask1:
@@ -12572,7 +12572,7 @@ define <8 x i64> @test_8xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12595,7 +12595,7 @@ define <8 x i64> @test_8xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask2:
@@ -12614,7 +12614,7 @@ define <8 x i64> @test_8xi64_zero_masked
define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) {
; GENERIC-LABEL: test_8xi64_shuff_mem_mask3:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_shuff_mem_mask3:
@@ -12630,7 +12630,7 @@ define <8 x i64> @test_8xi64_masked_shuf
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm3, %xmm3, %xmm3 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm3, %zmm2, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00]
; GENERIC-NEXT: vmovdqa64 %zmm1, %zmm0
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -12653,7 +12653,7 @@ define <8 x i64> @test_8xi64_zero_masked
; GENERIC: # BB#0:
; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33]
; GENERIC-NEXT: vpcmpeqq %zmm2, %zmm1, %k1
-; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3]
+; GENERIC-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3] sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: test_8xi64_zero_masked_shuff_mem_mask3:
More information about the llvm-commits
mailing list